mirror of
https://github.com/fatedier/frp.git
synced 2025-01-22 09:32:07 +00:00
add packages
This commit is contained in:
parent
0559865fe5
commit
9e0fd0c4ef
30
Godeps/Godeps.json
generated
30
Godeps/Godeps.json
generated
@ -1,6 +1,6 @@
|
||||
{
|
||||
"ImportPath": "github.com/fatedier/frp",
|
||||
"GoVersion": "go1.8",
|
||||
"GoVersion": "go1.9",
|
||||
"GodepVersion": "v79",
|
||||
"Packages": [
|
||||
"./..."
|
||||
@ -22,8 +22,13 @@
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/fatedier/beego/logs",
|
||||
"Comment": "v1.7.2-72-gf73c369",
|
||||
"Rev": "f73c3692bbd70a83728cb59b2c0423ff95e4ecea"
|
||||
"Comment": "v1.7.2-73-g6c6a4f5",
|
||||
"Rev": "6c6a4f5bd5eb5a39f7e289b8f345b55f75e7e3e8"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/fatedier/kcp-go",
|
||||
"Comment": "v3.15-35-gcd167d2",
|
||||
"Rev": "cd167d2f15f451b0f33780ce862fca97adc0331e"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/golang/snappy",
|
||||
@ -64,6 +69,25 @@
|
||||
"Comment": "v1.1.4-25-g2402e8e",
|
||||
"Rev": "2402e8e7a02fc811447d11f881aa9746cdc57983"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/templexxx/cpufeat",
|
||||
"Rev": "3794dfbfb04749f896b521032f69383f24c3687e"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/templexxx/reedsolomon",
|
||||
"Comment": "0.1.1-4-g7092926",
|
||||
"Rev": "7092926d7d05c415fabb892b1464a03f8228ab80"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/templexxx/xor",
|
||||
"Comment": "0.1.2",
|
||||
"Rev": "0af8e873c554da75f37f2049cdffda804533d44c"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/tjfoc/gmsm/sm4",
|
||||
"Comment": "v1.0-42-g21d76de",
|
||||
"Rev": "21d76dee237dbbc8dfe1510000b9bf2733635aa1"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/vaughan0/go-ini",
|
||||
"Rev": "a98ad7ee00ec53921f08832bc06ecf7fd600e6a1"
|
||||
|
@ -15,6 +15,7 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
@ -29,6 +30,7 @@ import (
|
||||
frpIo "github.com/fatedier/frp/utils/io"
|
||||
"github.com/fatedier/frp/utils/log"
|
||||
frpNet "github.com/fatedier/frp/utils/net"
|
||||
"github.com/fatedier/frp/utils/pool"
|
||||
)
|
||||
|
||||
// Proxy defines how to deal with work connections for different proxy type.
|
||||
@ -248,16 +250,24 @@ func (pxy *XtcpProxy) InWorkConn(conn frpNet.Conn) {
|
||||
return
|
||||
}
|
||||
|
||||
// Wait for client address at most 10 seconds.
|
||||
// Wait for client address at most 5 seconds.
|
||||
var natHoleRespMsg msg.NatHoleResp
|
||||
clientConn.SetReadDeadline(time.Now().Add(10 * time.Second))
|
||||
err = msg.ReadMsgInto(clientConn, &natHoleRespMsg)
|
||||
clientConn.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
|
||||
buf := pool.GetBuf(1024)
|
||||
n, err := clientConn.Read(buf)
|
||||
if err != nil {
|
||||
pxy.Error("get natHoleRespMsg error: %v", err)
|
||||
return
|
||||
}
|
||||
err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
|
||||
if err != nil {
|
||||
pxy.Error("get natHoleRespMsg error: %v", err)
|
||||
return
|
||||
}
|
||||
clientConn.SetReadDeadline(time.Time{})
|
||||
clientConn.Close()
|
||||
pxy.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
|
||||
|
||||
// Send sid to vistor udp address.
|
||||
time.Sleep(time.Second)
|
||||
|
@ -15,9 +15,11 @@
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -214,47 +216,67 @@ func (sv *XtcpVistor) handleConn(userConn frpNet.Conn) {
|
||||
}
|
||||
|
||||
// Wait for client address at most 10 seconds.
|
||||
var natHoleResp msg.NatHoleResp
|
||||
var natHoleRespMsg msg.NatHoleResp
|
||||
vistorConn.SetReadDeadline(time.Now().Add(10 * time.Second))
|
||||
err = msg.ReadMsgInto(vistorConn, &natHoleResp)
|
||||
buf := pool.GetBuf(1024)
|
||||
n, err := vistorConn.Read(buf)
|
||||
if err != nil {
|
||||
sv.Warn("get natHoleRespMsg error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
|
||||
if err != nil {
|
||||
sv.Warn("get natHoleRespMsg error: %v", err)
|
||||
return
|
||||
}
|
||||
vistorConn.SetReadDeadline(time.Time{})
|
||||
pool.PutBuf(buf)
|
||||
|
||||
sv.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
|
||||
|
||||
// Close vistorConn, so we can use it's local address.
|
||||
vistorConn.Close()
|
||||
|
||||
// Send detect message for all ports of client in case different NAT type.
|
||||
array := strings.Split(natHoleResp.ClientAddr, ":")
|
||||
if len(array) <= 0 {
|
||||
sv.Error("get natHoleResp client address error: %s", natHoleResp.ClientAddr)
|
||||
// Send detect message.
|
||||
array := strings.Split(natHoleRespMsg.ClientAddr, ":")
|
||||
if len(array) <= 1 {
|
||||
sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
|
||||
return
|
||||
}
|
||||
laddr, _ := net.ResolveUDPAddr("udp", vistorConn.LocalAddr().String())
|
||||
/*
|
||||
for i := 1000; i < 65000; i++ {
|
||||
sv.sendDetectMsg(array[0], int64(i), laddr)
|
||||
sv.sendDetectMsg(array[0], int64(i), laddr, "a")
|
||||
}
|
||||
*/
|
||||
port, err := strconv.ParseInt(array[1], 10, 64)
|
||||
if err != nil {
|
||||
sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
|
||||
return
|
||||
}
|
||||
sv.sendDetectMsg(array[0], int64(port), laddr, []byte(natHoleRespMsg.Sid))
|
||||
sv.Trace("send all detect msg done")
|
||||
|
||||
// Listen for vistorConn's address and wait for client connection.
|
||||
lConn, _ := net.ListenUDP("udp", laddr)
|
||||
lConn.SetReadDeadline(time.Now().Add(10 * time.Second))
|
||||
lConn.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
sidBuf := pool.GetBuf(1024)
|
||||
n, _, err := lConn.ReadFromUDP(sidBuf)
|
||||
n, _, err = lConn.ReadFromUDP(sidBuf)
|
||||
if err != nil {
|
||||
sv.Warn("get sid from client error: %v", err)
|
||||
return
|
||||
}
|
||||
lConn.SetReadDeadline(time.Time{})
|
||||
if string(sidBuf[:n]) != natHoleResp.Sid {
|
||||
if string(sidBuf[:n]) != natHoleRespMsg.Sid {
|
||||
sv.Warn("incorrect sid from client")
|
||||
return
|
||||
}
|
||||
sv.Info("nat hole connection make success, sid [%s]", string(sidBuf[:n]))
|
||||
pool.PutBuf(sidBuf)
|
||||
|
||||
var remote io.ReadWriteCloser
|
||||
remote, err = frpNet.NewKcpConnFromUdp(lConn, false, natHoleResp.ClientAddr)
|
||||
remote, err = frpNet.NewKcpConnFromUdp(lConn, false, natHoleRespMsg.ClientAddr)
|
||||
if err != nil {
|
||||
sv.Error("create kcp connection from udp connection error: %v", err)
|
||||
return
|
||||
@ -275,7 +297,7 @@ func (sv *XtcpVistor) handleConn(userConn frpNet.Conn) {
|
||||
frpIo.Join(userConn, remote)
|
||||
}
|
||||
|
||||
func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr) (err error) {
|
||||
func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr, content []byte) (err error) {
|
||||
daddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", addr, port))
|
||||
if err != nil {
|
||||
return err
|
||||
@ -289,7 +311,7 @@ func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr)
|
||||
uConn := ipv4.NewConn(tConn)
|
||||
uConn.SetTTL(3)
|
||||
|
||||
tConn.Write([]byte(fmt.Sprintf("%d", port)))
|
||||
tConn.Write(content)
|
||||
tConn.Close()
|
||||
return nil
|
||||
}
|
||||
|
3
vendor/github.com/fatedier/beego/logs/console.go
generated
vendored
3
vendor/github.com/fatedier/beego/logs/console.go
generated
vendored
@ -42,6 +42,7 @@ var colors = []brush{
|
||||
newBrush("1;32"), // Notice green
|
||||
newBrush("1;34"), // Informational blue
|
||||
newBrush("1;34"), // Debug blue
|
||||
newBrush("1;34"), // Trace blue
|
||||
}
|
||||
|
||||
// consoleWriter implements LoggerInterface and writes messages to terminal.
|
||||
@ -55,7 +56,7 @@ type consoleWriter struct {
|
||||
func NewConsole() Logger {
|
||||
cw := &consoleWriter{
|
||||
lg: newLogWriter(os.Stdout),
|
||||
Level: LevelDebug,
|
||||
Level: LevelTrace,
|
||||
Colorful: runtime.GOOS != "windows",
|
||||
}
|
||||
return cw
|
||||
|
24
vendor/github.com/fatedier/kcp-go/.gitignore
generated
vendored
Normal file
24
vendor/github.com/fatedier/kcp-go/.gitignore
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
15
vendor/github.com/fatedier/kcp-go/.travis.yml
generated
vendored
Normal file
15
vendor/github.com/fatedier/kcp-go/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
language: go
|
||||
go:
|
||||
- 1.9
|
||||
|
||||
before_install:
|
||||
- go get -t -v ./...
|
||||
|
||||
install:
|
||||
- go get github.com/xtaci/kcp-go
|
||||
|
||||
script:
|
||||
- go test -coverprofile=coverage.txt -covermode=atomic -bench .
|
||||
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
22
vendor/github.com/fatedier/kcp-go/LICENSE
generated
vendored
Normal file
22
vendor/github.com/fatedier/kcp-go/LICENSE
generated
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Daniel Fu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
172
vendor/github.com/fatedier/kcp-go/README.md
generated
vendored
Normal file
172
vendor/github.com/fatedier/kcp-go/README.md
generated
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
<img src="kcp-go.png" alt="kcp-go" height="50px" />
|
||||
|
||||
|
||||
[![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8]
|
||||
|
||||
[1]: https://godoc.org/github.com/xtaci/kcp-go?status.svg
|
||||
[2]: https://godoc.org/github.com/xtaci/kcp-go
|
||||
[3]: https://travis-ci.org/xtaci/kcp-go.svg?branch=master
|
||||
[4]: https://travis-ci.org/xtaci/kcp-go
|
||||
[5]: https://goreportcard.com/badge/github.com/xtaci/kcp-go
|
||||
[6]: https://goreportcard.com/report/github.com/xtaci/kcp-go
|
||||
[7]: https://codecov.io/gh/xtaci/kcp-go/branch/master/graph/badge.svg
|
||||
[8]: https://codecov.io/gh/xtaci/kcp-go
|
||||
[9]: https://img.shields.io/badge/KCP-Powered-blue.svg
|
||||
[10]: https://github.com/skywind3000/kcp
|
||||
[11]: https://img.shields.io/badge/license-MIT-blue.svg
|
||||
[12]: LICENSE
|
||||
|
||||
## Introduction
|
||||
|
||||
**kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/).
|
||||
|
||||
It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**.
|
||||
|
||||
[Lastest Release](https://github.com/xtaci/kcp-go/releases)
|
||||
|
||||
## Features
|
||||
|
||||
1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**.
|
||||
1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
|
||||
1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
|
||||
1. Handles **>5K concurrent connections** on a single commodity server.
|
||||
1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
|
||||
1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
|
||||
1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode.
|
||||
1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch.
|
||||
|
||||
## Conventions
|
||||
|
||||
Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
|
||||
|
||||
## Documentation
|
||||
|
||||
For complete documentation, see the associated [Godoc](https://godoc.org/github.com/xtaci/kcp-go).
|
||||
|
||||
## Specification
|
||||
|
||||
<img src="frame.png" alt="Frame Format" height="109px" />
|
||||
|
||||
```
|
||||
+-----------------+
|
||||
| SESSION |
|
||||
+-----------------+
|
||||
| KCP(ARQ) |
|
||||
+-----------------+
|
||||
| FEC(OPTIONAL) |
|
||||
+-----------------+
|
||||
| CRYPTO(OPTIONAL)|
|
||||
+-----------------+
|
||||
| UDP(PACKET) |
|
||||
+-----------------+
|
||||
| IP |
|
||||
+-----------------+
|
||||
| LINK |
|
||||
+-----------------+
|
||||
| PHY |
|
||||
+-----------------+
|
||||
(LAYER MODEL OF KCP-GO)
|
||||
```
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
Client: [full demo](https://github.com/xtaci/kcptun/blob/master/client/main.go)
|
||||
```go
|
||||
kcpconn, err := kcp.DialWithOptions("192.168.0.1:10000", nil, 10, 3)
|
||||
```
|
||||
Server: [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go)
|
||||
```go
|
||||
lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
|
||||
```
|
||||
|
||||
## Performance
|
||||
```
|
||||
Model Name: MacBook Pro
|
||||
Model Identifier: MacBookPro12,1
|
||||
Processor Name: Intel Core i5
|
||||
Processor Speed: 2.7 GHz
|
||||
Number of Processors: 1
|
||||
Total Number of Cores: 2
|
||||
L2 Cache (per Core): 256 KB
|
||||
L3 Cache: 3 MB
|
||||
Memory: 8 GB
|
||||
```
|
||||
```
|
||||
$ go test -v -run=^$ -bench .
|
||||
beginning tests, encryption:salsa20, fec:10/3
|
||||
BenchmarkAES128-4 200000 8256 ns/op 363.33 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkAES192-4 200000 9153 ns/op 327.74 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkAES256-4 200000 10079 ns/op 297.64 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkTEA-4 100000 18643 ns/op 160.91 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkXOR-4 5000000 316 ns/op 9486.46 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkBlowfish-4 50000 35643 ns/op 84.17 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkNone-4 30000000 56.2 ns/op 53371.83 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkCast5-4 30000 44744 ns/op 67.05 MB/s 0 B/op 0 allocs/op
|
||||
Benchmark3DES-4 2000 639839 ns/op 4.69 MB/s 2 B/op 0 allocs/op
|
||||
BenchmarkTwofish-4 30000 43368 ns/op 69.17 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkXTEA-4 30000 57673 ns/op 52.02 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkSalsa20-4 300000 3917 ns/op 765.80 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkFlush-4 10000000 226 ns/op 0 B/op 0 allocs/op
|
||||
BenchmarkEchoSpeed4K-4 5000 300030 ns/op 13.65 MB/s 5672 B/op 177 allocs/op
|
||||
BenchmarkEchoSpeed64K-4 500 3202335 ns/op 20.47 MB/s 73295 B/op 2198 allocs/op
|
||||
BenchmarkEchoSpeed512K-4 50 24926924 ns/op 21.03 MB/s 659339 B/op 17602 allocs/op
|
||||
BenchmarkEchoSpeed1M-4 20 64857821 ns/op 16.17 MB/s 1772437 B/op 42869 allocs/op
|
||||
BenchmarkSinkSpeed4K-4 30000 50230 ns/op 81.54 MB/s 2058 B/op 48 allocs/op
|
||||
BenchmarkSinkSpeed64K-4 2000 648718 ns/op 101.02 MB/s 31165 B/op 687 allocs/op
|
||||
BenchmarkSinkSpeed256K-4 300 4635905 ns/op 113.09 MB/s 286229 B/op 5516 allocs/op
|
||||
BenchmarkSinkSpeed1M-4 200 9566933 ns/op 109.60 MB/s 463771 B/op 10701 allocs/op
|
||||
PASS
|
||||
ok _/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go 39.689s
|
||||
```
|
||||
|
||||
## Design Considerations
|
||||
|
||||
1. slice vs. container/list
|
||||
|
||||
`kcp.flush()` loops through the send queue for retransmission checking for every 20ms(interval).
|
||||
|
||||
I've wrote a benchmark for comparing sequential loop through *slice* and *container/list* here:
|
||||
|
||||
https://github.com/xtaci/notes/blob/master/golang/benchmark2/cachemiss_test.go
|
||||
|
||||
```
|
||||
BenchmarkLoopSlice-4 2000000000 0.39 ns/op
|
||||
BenchmarkLoopList-4 100000000 54.6 ns/op
|
||||
```
|
||||
|
||||
List structure introduces **heavy cache misses** compared to slice which owns better **locality**, 5000 connections with 32 window size and 20ms interval will cost 6us/0.03%(cpu) using slice, and 8.7ms/43.5%(cpu) for list for each `kcp.flush()`.
|
||||
|
||||
2. Timing accuracy vs. syscall clock_gettime
|
||||
|
||||
Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now():
|
||||
|
||||
https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
|
||||
|
||||
```
|
||||
BenchmarkNow-4 100000000 15.6 ns/op
|
||||
```
|
||||
|
||||
In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
|
||||
|
||||
|
||||
## Tuning
|
||||
|
||||
Q: I'm handling >5K connections on my server. the CPU utilization is high.
|
||||
|
||||
A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load.
|
||||
|
||||
## Who is using this?
|
||||
|
||||
1. https://github.com/xtaci/kcptun -- A Secure Tunnel Based On KCP over UDP.
|
||||
2. https://github.com/getlantern/lantern -- Lantern delivers fast access to the open Internet.
|
||||
3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
|
||||
4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
|
||||
5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
|
||||
6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
|
||||
|
||||
## Links
|
||||
|
||||
1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
|
||||
2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
|
||||
3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go
|
288
vendor/github.com/fatedier/kcp-go/crypt.go
generated
vendored
Normal file
288
vendor/github.com/fatedier/kcp-go/crypt.go
generated
vendored
Normal file
@ -0,0 +1,288 @@
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/des"
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/templexxx/xor"
|
||||
"github.com/tjfoc/gmsm/sm4"
|
||||
|
||||
"golang.org/x/crypto/blowfish"
|
||||
"golang.org/x/crypto/cast5"
|
||||
"golang.org/x/crypto/pbkdf2"
|
||||
"golang.org/x/crypto/salsa20"
|
||||
"golang.org/x/crypto/tea"
|
||||
"golang.org/x/crypto/twofish"
|
||||
"golang.org/x/crypto/xtea"
|
||||
)
|
||||
|
||||
var (
|
||||
initialVector = []byte{167, 115, 79, 156, 18, 172, 27, 1, 164, 21, 242, 193, 252, 120, 230, 107}
|
||||
saltxor = `sH3CIVoF#rWLtJo6`
|
||||
)
|
||||
|
||||
// BlockCrypt defines encryption/decryption methods for a given byte slice.
|
||||
// Notes on implementing: the data to be encrypted contains a builtin
|
||||
// nonce at the first 16 bytes
|
||||
type BlockCrypt interface {
|
||||
// Encrypt encrypts the whole block in src into dst.
|
||||
// Dst and src may point at the same memory.
|
||||
Encrypt(dst, src []byte)
|
||||
|
||||
// Decrypt decrypts the whole block in src into dst.
|
||||
// Dst and src may point at the same memory.
|
||||
Decrypt(dst, src []byte)
|
||||
}
|
||||
|
||||
type salsa20BlockCrypt struct {
|
||||
key [32]byte
|
||||
}
|
||||
|
||||
// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20
|
||||
func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(salsa20BlockCrypt)
|
||||
copy(c.key[:], key)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) {
|
||||
salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
|
||||
copy(dst[:8], src[:8])
|
||||
}
|
||||
func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
|
||||
salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
|
||||
copy(dst[:8], src[:8])
|
||||
}
|
||||
|
||||
type sm4BlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewSM4BlockCrypt https://github.com/tjfoc/gmsm/tree/master/sm4
|
||||
func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(sm4BlockCrypt)
|
||||
block, err := sm4.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, sm4.BlockSize)
|
||||
c.decbuf = make([]byte, 2*sm4.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type twofishBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish
|
||||
func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(twofishBlockCrypt)
|
||||
block, err := twofish.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, twofish.BlockSize)
|
||||
c.decbuf = make([]byte, 2*twofish.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type tripleDESBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES
|
||||
func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(tripleDESBlockCrypt)
|
||||
block, err := des.NewTripleDESCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, des.BlockSize)
|
||||
c.decbuf = make([]byte, 2*des.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type cast5BlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128
|
||||
func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(cast5BlockCrypt)
|
||||
block, err := cast5.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, cast5.BlockSize)
|
||||
c.decbuf = make([]byte, 2*cast5.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type blowfishBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher)
|
||||
func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(blowfishBlockCrypt)
|
||||
block, err := blowfish.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, blowfish.BlockSize)
|
||||
c.decbuf = make([]byte, 2*blowfish.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type aesBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
|
||||
func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(aesBlockCrypt)
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, aes.BlockSize)
|
||||
c.decbuf = make([]byte, 2*aes.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type teaBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm
|
||||
func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(teaBlockCrypt)
|
||||
block, err := tea.NewCipherWithRounds(key, 16)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, tea.BlockSize)
|
||||
c.decbuf = make([]byte, 2*tea.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type xteaBlockCrypt struct {
|
||||
encbuf []byte
|
||||
decbuf []byte
|
||||
block cipher.Block
|
||||
}
|
||||
|
||||
// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA
|
||||
func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(xteaBlockCrypt)
|
||||
block, err := xtea.NewCipher(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.block = block
|
||||
c.encbuf = make([]byte, xtea.BlockSize)
|
||||
c.decbuf = make([]byte, 2*xtea.BlockSize)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
|
||||
func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
|
||||
|
||||
type simpleXORBlockCrypt struct {
|
||||
xortbl []byte
|
||||
}
|
||||
|
||||
// NewSimpleXORBlockCrypt simple xor with key expanding
|
||||
func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
c := new(simpleXORBlockCrypt)
|
||||
c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||
|
||||
type noneBlockCrypt struct{}
|
||||
|
||||
// NewNoneBlockCrypt does nothing but copying
|
||||
func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
return new(noneBlockCrypt), nil
|
||||
}
|
||||
|
||||
func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
|
||||
func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
|
||||
|
||||
// packet encryption with local CFB mode
|
||||
func encrypt(block cipher.Block, dst, src, buf []byte) {
|
||||
blocksize := block.BlockSize()
|
||||
tbl := buf[:blocksize]
|
||||
block.Encrypt(tbl, initialVector)
|
||||
n := len(src) / blocksize
|
||||
base := 0
|
||||
for i := 0; i < n; i++ {
|
||||
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||
block.Encrypt(tbl, dst[base:])
|
||||
base += blocksize
|
||||
}
|
||||
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||
}
|
||||
|
||||
func decrypt(block cipher.Block, dst, src, buf []byte) {
|
||||
blocksize := block.BlockSize()
|
||||
tbl := buf[:blocksize]
|
||||
next := buf[blocksize:]
|
||||
block.Encrypt(tbl, initialVector)
|
||||
n := len(src) / blocksize
|
||||
base := 0
|
||||
for i := 0; i < n; i++ {
|
||||
block.Encrypt(next, src[base:])
|
||||
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||
tbl, next = next, tbl
|
||||
base += blocksize
|
||||
}
|
||||
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||
}
|
BIN
vendor/github.com/fatedier/kcp-go/donate.png
generated
vendored
Normal file
BIN
vendor/github.com/fatedier/kcp-go/donate.png
generated
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 4.3 KiB |
303
vendor/github.com/fatedier/kcp-go/fec.go
generated
vendored
Normal file
303
vendor/github.com/fatedier/kcp-go/fec.go
generated
vendored
Normal file
@ -0,0 +1,303 @@
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/templexxx/reedsolomon"
|
||||
)
|
||||
|
||||
const (
|
||||
fecHeaderSize = 6
|
||||
fecHeaderSizePlus2 = fecHeaderSize + 2 // plus 2B data size
|
||||
typeData = 0xf1
|
||||
typeFEC = 0xf2
|
||||
)
|
||||
|
||||
type (
|
||||
// fecPacket is a decoded FEC packet
|
||||
fecPacket struct {
|
||||
seqid uint32
|
||||
flag uint16
|
||||
data []byte
|
||||
}
|
||||
|
||||
// fecDecoder for decoding incoming packets
|
||||
fecDecoder struct {
|
||||
rxlimit int // queue size limit
|
||||
dataShards int
|
||||
parityShards int
|
||||
shardSize int
|
||||
rx []fecPacket // ordered receive queue
|
||||
|
||||
// caches
|
||||
decodeCache [][]byte
|
||||
flagCache []bool
|
||||
|
||||
// RS decoder
|
||||
codec reedsolomon.Encoder
|
||||
}
|
||||
)
|
||||
|
||||
func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
|
||||
if dataShards <= 0 || parityShards <= 0 {
|
||||
return nil
|
||||
}
|
||||
if rxlimit < dataShards+parityShards {
|
||||
return nil
|
||||
}
|
||||
|
||||
fec := new(fecDecoder)
|
||||
fec.rxlimit = rxlimit
|
||||
fec.dataShards = dataShards
|
||||
fec.parityShards = parityShards
|
||||
fec.shardSize = dataShards + parityShards
|
||||
enc, err := reedsolomon.New(dataShards, parityShards)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
fec.codec = enc
|
||||
fec.decodeCache = make([][]byte, fec.shardSize)
|
||||
fec.flagCache = make([]bool, fec.shardSize)
|
||||
return fec
|
||||
}
|
||||
|
||||
// decodeBytes a fec packet
|
||||
func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
|
||||
var pkt fecPacket
|
||||
pkt.seqid = binary.LittleEndian.Uint32(data)
|
||||
pkt.flag = binary.LittleEndian.Uint16(data[4:])
|
||||
// allocate memory & copy
|
||||
buf := xmitBuf.Get().([]byte)[:len(data)-6]
|
||||
copy(buf, data[6:])
|
||||
pkt.data = buf
|
||||
return pkt
|
||||
}
|
||||
|
||||
// decode a fec packet
|
||||
func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
|
||||
// insertion
|
||||
n := len(dec.rx) - 1
|
||||
insertIdx := 0
|
||||
for i := n; i >= 0; i-- {
|
||||
if pkt.seqid == dec.rx[i].seqid { // de-duplicate
|
||||
xmitBuf.Put(pkt.data)
|
||||
return nil
|
||||
} else if _itimediff(pkt.seqid, dec.rx[i].seqid) > 0 { // insertion
|
||||
insertIdx = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// insert into ordered rx queue
|
||||
if insertIdx == n+1 {
|
||||
dec.rx = append(dec.rx, pkt)
|
||||
} else {
|
||||
dec.rx = append(dec.rx, fecPacket{})
|
||||
copy(dec.rx[insertIdx+1:], dec.rx[insertIdx:]) // shift right
|
||||
dec.rx[insertIdx] = pkt
|
||||
}
|
||||
|
||||
// shard range for current packet
|
||||
shardBegin := pkt.seqid - pkt.seqid%uint32(dec.shardSize)
|
||||
shardEnd := shardBegin + uint32(dec.shardSize) - 1
|
||||
|
||||
// max search range in ordered queue for current shard
|
||||
searchBegin := insertIdx - int(pkt.seqid%uint32(dec.shardSize))
|
||||
if searchBegin < 0 {
|
||||
searchBegin = 0
|
||||
}
|
||||
searchEnd := searchBegin + dec.shardSize - 1
|
||||
if searchEnd >= len(dec.rx) {
|
||||
searchEnd = len(dec.rx) - 1
|
||||
}
|
||||
|
||||
// re-construct datashards
|
||||
if searchEnd-searchBegin+1 >= dec.dataShards {
|
||||
var numshard, numDataShard, first, maxlen int
|
||||
|
||||
// zero cache
|
||||
shards := dec.decodeCache
|
||||
shardsflag := dec.flagCache
|
||||
for k := range dec.decodeCache {
|
||||
shards[k] = nil
|
||||
shardsflag[k] = false
|
||||
}
|
||||
|
||||
// shard assembly
|
||||
for i := searchBegin; i <= searchEnd; i++ {
|
||||
seqid := dec.rx[i].seqid
|
||||
if _itimediff(seqid, shardEnd) > 0 {
|
||||
break
|
||||
} else if _itimediff(seqid, shardBegin) >= 0 {
|
||||
shards[seqid%uint32(dec.shardSize)] = dec.rx[i].data
|
||||
shardsflag[seqid%uint32(dec.shardSize)] = true
|
||||
numshard++
|
||||
if dec.rx[i].flag == typeData {
|
||||
numDataShard++
|
||||
}
|
||||
if numshard == 1 {
|
||||
first = i
|
||||
}
|
||||
if len(dec.rx[i].data) > maxlen {
|
||||
maxlen = len(dec.rx[i].data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if numDataShard == dec.dataShards {
|
||||
// case 1: no lost data shards
|
||||
dec.rx = dec.freeRange(first, numshard, dec.rx)
|
||||
} else if numshard >= dec.dataShards {
|
||||
// case 2: data shard lost, but recoverable from parity shard
|
||||
for k := range shards {
|
||||
if shards[k] != nil {
|
||||
dlen := len(shards[k])
|
||||
shards[k] = shards[k][:maxlen]
|
||||
xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
|
||||
}
|
||||
}
|
||||
if err := dec.codec.ReconstructData(shards); err == nil {
|
||||
for k := range shards[:dec.dataShards] {
|
||||
if !shardsflag[k] {
|
||||
recovered = append(recovered, shards[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
dec.rx = dec.freeRange(first, numshard, dec.rx)
|
||||
}
|
||||
}
|
||||
|
||||
// keep rxlimit
|
||||
if len(dec.rx) > dec.rxlimit {
|
||||
if dec.rx[0].flag == typeData { // record unrecoverable data
|
||||
atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
|
||||
}
|
||||
dec.rx = dec.freeRange(0, 1, dec.rx)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// free a range of fecPacket, and zero for GC recycling
|
||||
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
||||
for i := first; i < first+n; i++ { // free
|
||||
xmitBuf.Put(q[i].data)
|
||||
}
|
||||
copy(q[first:], q[first+n:])
|
||||
for i := 0; i < n; i++ { // dereference data
|
||||
q[len(q)-1-i].data = nil
|
||||
}
|
||||
return q[:len(q)-n]
|
||||
}
|
||||
|
||||
type (
|
||||
// fecEncoder for encoding outgoing packets
|
||||
fecEncoder struct {
|
||||
dataShards int
|
||||
parityShards int
|
||||
shardSize int
|
||||
paws uint32 // Protect Against Wrapped Sequence numbers
|
||||
next uint32 // next seqid
|
||||
|
||||
shardCount int // count the number of datashards collected
|
||||
maxSize int // record maximum data length in datashard
|
||||
|
||||
headerOffset int // FEC header offset
|
||||
payloadOffset int // FEC payload offset
|
||||
|
||||
// caches
|
||||
shardCache [][]byte
|
||||
encodeCache [][]byte
|
||||
|
||||
// RS encoder
|
||||
codec reedsolomon.Encoder
|
||||
}
|
||||
)
|
||||
|
||||
func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
|
||||
if dataShards <= 0 || parityShards <= 0 {
|
||||
return nil
|
||||
}
|
||||
fec := new(fecEncoder)
|
||||
fec.dataShards = dataShards
|
||||
fec.parityShards = parityShards
|
||||
fec.shardSize = dataShards + parityShards
|
||||
fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
|
||||
fec.headerOffset = offset
|
||||
fec.payloadOffset = fec.headerOffset + fecHeaderSize
|
||||
|
||||
enc, err := reedsolomon.New(dataShards, parityShards)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
fec.codec = enc
|
||||
|
||||
// caches
|
||||
fec.encodeCache = make([][]byte, fec.shardSize)
|
||||
fec.shardCache = make([][]byte, fec.shardSize)
|
||||
for k := range fec.shardCache {
|
||||
fec.shardCache[k] = make([]byte, mtuLimit)
|
||||
}
|
||||
return fec
|
||||
}
|
||||
|
||||
// encode the packet, output parity shards if we have enough datashards
|
||||
// the content of returned parityshards will change in next encode
|
||||
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
|
||||
enc.markData(b[enc.headerOffset:])
|
||||
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
|
||||
|
||||
// copy data to fec datashards
|
||||
sz := len(b)
|
||||
enc.shardCache[enc.shardCount] = enc.shardCache[enc.shardCount][:sz]
|
||||
copy(enc.shardCache[enc.shardCount], b)
|
||||
enc.shardCount++
|
||||
|
||||
// record max datashard length
|
||||
if sz > enc.maxSize {
|
||||
enc.maxSize = sz
|
||||
}
|
||||
|
||||
// calculate Reed-Solomon Erasure Code
|
||||
if enc.shardCount == enc.dataShards {
|
||||
// bzero each datashard's tail
|
||||
for i := 0; i < enc.dataShards; i++ {
|
||||
shard := enc.shardCache[i]
|
||||
slen := len(shard)
|
||||
xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize])
|
||||
}
|
||||
|
||||
// construct equal-sized slice with stripped header
|
||||
cache := enc.encodeCache
|
||||
for k := range cache {
|
||||
cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
|
||||
}
|
||||
|
||||
// rs encode
|
||||
if err := enc.codec.Encode(cache); err == nil {
|
||||
ps = enc.shardCache[enc.dataShards:]
|
||||
for k := range ps {
|
||||
enc.markFEC(ps[k][enc.headerOffset:])
|
||||
ps[k] = ps[k][:enc.maxSize]
|
||||
}
|
||||
}
|
||||
|
||||
// reset counters to zero
|
||||
enc.shardCount = 0
|
||||
enc.maxSize = 0
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (enc *fecEncoder) markData(data []byte) {
|
||||
binary.LittleEndian.PutUint32(data, enc.next)
|
||||
binary.LittleEndian.PutUint16(data[4:], typeData)
|
||||
enc.next++
|
||||
}
|
||||
|
||||
func (enc *fecEncoder) markFEC(data []byte) {
|
||||
binary.LittleEndian.PutUint32(data, enc.next)
|
||||
binary.LittleEndian.PutUint16(data[4:], typeFEC)
|
||||
enc.next = (enc.next + 1) % enc.paws
|
||||
}
|
BIN
vendor/github.com/fatedier/kcp-go/frame.png
generated
vendored
Normal file
BIN
vendor/github.com/fatedier/kcp-go/frame.png
generated
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 35 KiB |
BIN
vendor/github.com/fatedier/kcp-go/kcp-go.png
generated
vendored
Normal file
BIN
vendor/github.com/fatedier/kcp-go/kcp-go.png
generated
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.9 KiB |
998
vendor/github.com/fatedier/kcp-go/kcp.go
generated
vendored
Normal file
998
vendor/github.com/fatedier/kcp-go/kcp.go
generated
vendored
Normal file
@ -0,0 +1,998 @@
|
||||
// Package kcp - A Fast and Reliable ARQ Protocol
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
const (
|
||||
IKCP_RTO_NDL = 30 // no delay min rto
|
||||
IKCP_RTO_MIN = 100 // normal min rto
|
||||
IKCP_RTO_DEF = 200
|
||||
IKCP_RTO_MAX = 60000
|
||||
IKCP_CMD_PUSH = 81 // cmd: push data
|
||||
IKCP_CMD_ACK = 82 // cmd: ack
|
||||
IKCP_CMD_WASK = 83 // cmd: window probe (ask)
|
||||
IKCP_CMD_WINS = 84 // cmd: window size (tell)
|
||||
IKCP_ASK_SEND = 1 // need to send IKCP_CMD_WASK
|
||||
IKCP_ASK_TELL = 2 // need to send IKCP_CMD_WINS
|
||||
IKCP_WND_SND = 32
|
||||
IKCP_WND_RCV = 32
|
||||
IKCP_MTU_DEF = 1400
|
||||
IKCP_ACK_FAST = 3
|
||||
IKCP_INTERVAL = 100
|
||||
IKCP_OVERHEAD = 24
|
||||
IKCP_DEADLINK = 20
|
||||
IKCP_THRESH_INIT = 2
|
||||
IKCP_THRESH_MIN = 2
|
||||
IKCP_PROBE_INIT = 7000 // 7 secs to probe window size
|
||||
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
|
||||
)
|
||||
|
||||
// output_callback is a prototype which ought capture conn and call conn.Write
|
||||
type output_callback func(buf []byte, size int)
|
||||
|
||||
/* encode 8 bits unsigned int */
|
||||
func ikcp_encode8u(p []byte, c byte) []byte {
|
||||
p[0] = c
|
||||
return p[1:]
|
||||
}
|
||||
|
||||
/* decode 8 bits unsigned int */
|
||||
func ikcp_decode8u(p []byte, c *byte) []byte {
|
||||
*c = p[0]
|
||||
return p[1:]
|
||||
}
|
||||
|
||||
/* encode 16 bits unsigned int (lsb) */
|
||||
func ikcp_encode16u(p []byte, w uint16) []byte {
|
||||
binary.LittleEndian.PutUint16(p, w)
|
||||
return p[2:]
|
||||
}
|
||||
|
||||
/* decode 16 bits unsigned int (lsb) */
|
||||
func ikcp_decode16u(p []byte, w *uint16) []byte {
|
||||
*w = binary.LittleEndian.Uint16(p)
|
||||
return p[2:]
|
||||
}
|
||||
|
||||
/* encode 32 bits unsigned int (lsb) */
|
||||
func ikcp_encode32u(p []byte, l uint32) []byte {
|
||||
binary.LittleEndian.PutUint32(p, l)
|
||||
return p[4:]
|
||||
}
|
||||
|
||||
/* decode 32 bits unsigned int (lsb) */
|
||||
func ikcp_decode32u(p []byte, l *uint32) []byte {
|
||||
*l = binary.LittleEndian.Uint32(p)
|
||||
return p[4:]
|
||||
}
|
||||
|
||||
func _imin_(a, b uint32) uint32 {
|
||||
if a <= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func _imax_(a, b uint32) uint32 {
|
||||
if a >= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func _ibound_(lower, middle, upper uint32) uint32 {
|
||||
return _imin_(_imax_(lower, middle), upper)
|
||||
}
|
||||
|
||||
func _itimediff(later, earlier uint32) int32 {
|
||||
return (int32)(later - earlier)
|
||||
}
|
||||
|
||||
// segment defines a KCP segment
|
||||
type segment struct {
|
||||
conv uint32
|
||||
cmd uint8
|
||||
frg uint8
|
||||
wnd uint16
|
||||
ts uint32
|
||||
sn uint32
|
||||
una uint32
|
||||
rto uint32
|
||||
xmit uint32
|
||||
resendts uint32
|
||||
fastack uint32
|
||||
data []byte
|
||||
}
|
||||
|
||||
// encode a segment into buffer
|
||||
func (seg *segment) encode(ptr []byte) []byte {
|
||||
ptr = ikcp_encode32u(ptr, seg.conv)
|
||||
ptr = ikcp_encode8u(ptr, seg.cmd)
|
||||
ptr = ikcp_encode8u(ptr, seg.frg)
|
||||
ptr = ikcp_encode16u(ptr, seg.wnd)
|
||||
ptr = ikcp_encode32u(ptr, seg.ts)
|
||||
ptr = ikcp_encode32u(ptr, seg.sn)
|
||||
ptr = ikcp_encode32u(ptr, seg.una)
|
||||
ptr = ikcp_encode32u(ptr, uint32(len(seg.data)))
|
||||
atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
|
||||
return ptr
|
||||
}
|
||||
|
||||
// KCP defines a single KCP connection
|
||||
type KCP struct {
|
||||
conv, mtu, mss, state uint32
|
||||
snd_una, snd_nxt, rcv_nxt uint32
|
||||
ssthresh uint32
|
||||
rx_rttvar, rx_srtt int32
|
||||
rx_rto, rx_minrto uint32
|
||||
snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
|
||||
interval, ts_flush uint32
|
||||
nodelay, updated uint32
|
||||
ts_probe, probe_wait uint32
|
||||
dead_link, incr uint32
|
||||
|
||||
fastresend int32
|
||||
nocwnd, stream int32
|
||||
|
||||
snd_queue []segment
|
||||
rcv_queue []segment
|
||||
snd_buf []segment
|
||||
rcv_buf []segment
|
||||
|
||||
acklist []ackItem
|
||||
|
||||
buffer []byte
|
||||
output output_callback
|
||||
}
|
||||
|
||||
type ackItem struct {
|
||||
sn uint32
|
||||
ts uint32
|
||||
}
|
||||
|
||||
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
|
||||
// from the same connection.
|
||||
func NewKCP(conv uint32, output output_callback) *KCP {
|
||||
kcp := new(KCP)
|
||||
kcp.conv = conv
|
||||
kcp.snd_wnd = IKCP_WND_SND
|
||||
kcp.rcv_wnd = IKCP_WND_RCV
|
||||
kcp.rmt_wnd = IKCP_WND_RCV
|
||||
kcp.mtu = IKCP_MTU_DEF
|
||||
kcp.mss = kcp.mtu - IKCP_OVERHEAD
|
||||
kcp.buffer = make([]byte, (kcp.mtu+IKCP_OVERHEAD)*3)
|
||||
kcp.rx_rto = IKCP_RTO_DEF
|
||||
kcp.rx_minrto = IKCP_RTO_MIN
|
||||
kcp.interval = IKCP_INTERVAL
|
||||
kcp.ts_flush = IKCP_INTERVAL
|
||||
kcp.ssthresh = IKCP_THRESH_INIT
|
||||
kcp.dead_link = IKCP_DEADLINK
|
||||
kcp.output = output
|
||||
return kcp
|
||||
}
|
||||
|
||||
// newSegment creates a KCP segment
|
||||
func (kcp *KCP) newSegment(size int) (seg segment) {
|
||||
seg.data = xmitBuf.Get().([]byte)[:size]
|
||||
return
|
||||
}
|
||||
|
||||
// delSegment recycles a KCP segment
|
||||
func (kcp *KCP) delSegment(seg segment) {
|
||||
xmitBuf.Put(seg.data)
|
||||
}
|
||||
|
||||
// PeekSize checks the size of next message in the recv queue
|
||||
func (kcp *KCP) PeekSize() (length int) {
|
||||
if len(kcp.rcv_queue) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
seg := &kcp.rcv_queue[0]
|
||||
if seg.frg == 0 {
|
||||
return len(seg.data)
|
||||
}
|
||||
|
||||
if len(kcp.rcv_queue) < int(seg.frg+1) {
|
||||
return -1
|
||||
}
|
||||
|
||||
for k := range kcp.rcv_queue {
|
||||
seg := &kcp.rcv_queue[k]
|
||||
length += len(seg.data)
|
||||
if seg.frg == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Recv is user/upper level recv: returns size, returns below zero for EAGAIN
|
||||
func (kcp *KCP) Recv(buffer []byte) (n int) {
|
||||
if len(kcp.rcv_queue) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
peeksize := kcp.PeekSize()
|
||||
if peeksize < 0 {
|
||||
return -2
|
||||
}
|
||||
|
||||
if peeksize > len(buffer) {
|
||||
return -3
|
||||
}
|
||||
|
||||
var fast_recover bool
|
||||
if len(kcp.rcv_queue) >= int(kcp.rcv_wnd) {
|
||||
fast_recover = true
|
||||
}
|
||||
|
||||
// merge fragment
|
||||
count := 0
|
||||
for k := range kcp.rcv_queue {
|
||||
seg := &kcp.rcv_queue[k]
|
||||
copy(buffer, seg.data)
|
||||
buffer = buffer[len(seg.data):]
|
||||
n += len(seg.data)
|
||||
count++
|
||||
kcp.delSegment(*seg)
|
||||
if seg.frg == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
kcp.rcv_queue = kcp.remove_front(kcp.rcv_queue, count)
|
||||
}
|
||||
|
||||
// move available data from rcv_buf -> rcv_queue
|
||||
count = 0
|
||||
for k := range kcp.rcv_buf {
|
||||
seg := &kcp.rcv_buf[k]
|
||||
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
|
||||
kcp.rcv_nxt++
|
||||
count++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
|
||||
kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
|
||||
}
|
||||
|
||||
// fast recover
|
||||
if len(kcp.rcv_queue) < int(kcp.rcv_wnd) && fast_recover {
|
||||
// ready to send back IKCP_CMD_WINS in ikcp_flush
|
||||
// tell remote my window size
|
||||
kcp.probe |= IKCP_ASK_TELL
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Send is user/upper level send, returns below zero for error
|
||||
func (kcp *KCP) Send(buffer []byte) int {
|
||||
var count int
|
||||
if len(buffer) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
// append to previous segment in streaming mode (if possible)
|
||||
if kcp.stream != 0 {
|
||||
n := len(kcp.snd_queue)
|
||||
if n > 0 {
|
||||
seg := &kcp.snd_queue[n-1]
|
||||
if len(seg.data) < int(kcp.mss) {
|
||||
capacity := int(kcp.mss) - len(seg.data)
|
||||
extend := capacity
|
||||
if len(buffer) < capacity {
|
||||
extend = len(buffer)
|
||||
}
|
||||
|
||||
// grow slice, the underlying cap is guaranteed to
|
||||
// be larger than kcp.mss
|
||||
oldlen := len(seg.data)
|
||||
seg.data = seg.data[:oldlen+extend]
|
||||
copy(seg.data[oldlen:], buffer)
|
||||
buffer = buffer[extend:]
|
||||
}
|
||||
}
|
||||
|
||||
if len(buffer) == 0 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
if len(buffer) <= int(kcp.mss) {
|
||||
count = 1
|
||||
} else {
|
||||
count = (len(buffer) + int(kcp.mss) - 1) / int(kcp.mss)
|
||||
}
|
||||
|
||||
if count > 255 {
|
||||
return -2
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
count = 1
|
||||
}
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
var size int
|
||||
if len(buffer) > int(kcp.mss) {
|
||||
size = int(kcp.mss)
|
||||
} else {
|
||||
size = len(buffer)
|
||||
}
|
||||
seg := kcp.newSegment(size)
|
||||
copy(seg.data, buffer[:size])
|
||||
if kcp.stream == 0 { // message mode
|
||||
seg.frg = uint8(count - i - 1)
|
||||
} else { // stream mode
|
||||
seg.frg = 0
|
||||
}
|
||||
kcp.snd_queue = append(kcp.snd_queue, seg)
|
||||
buffer = buffer[size:]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kcp *KCP) update_ack(rtt int32) {
|
||||
// https://tools.ietf.org/html/rfc6298
|
||||
var rto uint32
|
||||
if kcp.rx_srtt == 0 {
|
||||
kcp.rx_srtt = rtt
|
||||
kcp.rx_rttvar = rtt >> 1
|
||||
} else {
|
||||
delta := rtt - kcp.rx_srtt
|
||||
kcp.rx_srtt += delta >> 3
|
||||
if delta < 0 {
|
||||
delta = -delta
|
||||
}
|
||||
if rtt < kcp.rx_srtt-kcp.rx_rttvar {
|
||||
// if the new RTT sample is below the bottom of the range of
|
||||
// what an RTT measurement is expected to be.
|
||||
// give an 8x reduced weight versus its normal weighting
|
||||
kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 5
|
||||
} else {
|
||||
kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 2
|
||||
}
|
||||
}
|
||||
rto = uint32(kcp.rx_srtt) + _imax_(kcp.interval, uint32(kcp.rx_rttvar)<<2)
|
||||
kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
|
||||
}
|
||||
|
||||
func (kcp *KCP) shrink_buf() {
|
||||
if len(kcp.snd_buf) > 0 {
|
||||
seg := &kcp.snd_buf[0]
|
||||
kcp.snd_una = seg.sn
|
||||
} else {
|
||||
kcp.snd_una = kcp.snd_nxt
|
||||
}
|
||||
}
|
||||
|
||||
func (kcp *KCP) parse_ack(sn uint32) {
|
||||
if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for k := range kcp.snd_buf {
|
||||
seg := &kcp.snd_buf[k]
|
||||
if sn == seg.sn {
|
||||
kcp.delSegment(*seg)
|
||||
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
|
||||
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
|
||||
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
|
||||
break
|
||||
}
|
||||
if _itimediff(sn, seg.sn) < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (kcp *KCP) parse_fastack(sn uint32) {
|
||||
if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for k := range kcp.snd_buf {
|
||||
seg := &kcp.snd_buf[k]
|
||||
if _itimediff(sn, seg.sn) < 0 {
|
||||
break
|
||||
} else if sn != seg.sn {
|
||||
seg.fastack++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (kcp *KCP) parse_una(una uint32) {
|
||||
count := 0
|
||||
for k := range kcp.snd_buf {
|
||||
seg := &kcp.snd_buf[k]
|
||||
if _itimediff(una, seg.sn) > 0 {
|
||||
kcp.delSegment(*seg)
|
||||
count++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
kcp.snd_buf = kcp.remove_front(kcp.snd_buf, count)
|
||||
}
|
||||
}
|
||||
|
||||
// ack append
|
||||
func (kcp *KCP) ack_push(sn, ts uint32) {
|
||||
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
|
||||
}
|
||||
|
||||
func (kcp *KCP) parse_data(newseg segment) {
|
||||
sn := newseg.sn
|
||||
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
|
||||
_itimediff(sn, kcp.rcv_nxt) < 0 {
|
||||
kcp.delSegment(newseg)
|
||||
return
|
||||
}
|
||||
|
||||
n := len(kcp.rcv_buf) - 1
|
||||
insert_idx := 0
|
||||
repeat := false
|
||||
for i := n; i >= 0; i-- {
|
||||
seg := &kcp.rcv_buf[i]
|
||||
if seg.sn == sn {
|
||||
repeat = true
|
||||
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
|
||||
break
|
||||
}
|
||||
if _itimediff(sn, seg.sn) > 0 {
|
||||
insert_idx = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !repeat {
|
||||
if insert_idx == n+1 {
|
||||
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
|
||||
} else {
|
||||
kcp.rcv_buf = append(kcp.rcv_buf, segment{})
|
||||
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
|
||||
kcp.rcv_buf[insert_idx] = newseg
|
||||
}
|
||||
} else {
|
||||
kcp.delSegment(newseg)
|
||||
}
|
||||
|
||||
// move available data from rcv_buf -> rcv_queue
|
||||
count := 0
|
||||
for k := range kcp.rcv_buf {
|
||||
seg := &kcp.rcv_buf[k]
|
||||
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
|
||||
kcp.rcv_nxt++
|
||||
count++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
|
||||
kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
|
||||
}
|
||||
}
|
||||
|
||||
// Input when you received a low level packet (eg. UDP packet), call it
|
||||
// regular indicates a regular packet has received(not from FEC)
|
||||
func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
|
||||
una := kcp.snd_una
|
||||
if len(data) < IKCP_OVERHEAD {
|
||||
return -1
|
||||
}
|
||||
|
||||
var maxack uint32
|
||||
var lastackts uint32
|
||||
var flag int
|
||||
var inSegs uint64
|
||||
|
||||
for {
|
||||
var ts, sn, length, una, conv uint32
|
||||
var wnd uint16
|
||||
var cmd, frg uint8
|
||||
|
||||
if len(data) < int(IKCP_OVERHEAD) {
|
||||
break
|
||||
}
|
||||
|
||||
data = ikcp_decode32u(data, &conv)
|
||||
if conv != kcp.conv {
|
||||
return -1
|
||||
}
|
||||
|
||||
data = ikcp_decode8u(data, &cmd)
|
||||
data = ikcp_decode8u(data, &frg)
|
||||
data = ikcp_decode16u(data, &wnd)
|
||||
data = ikcp_decode32u(data, &ts)
|
||||
data = ikcp_decode32u(data, &sn)
|
||||
data = ikcp_decode32u(data, &una)
|
||||
data = ikcp_decode32u(data, &length)
|
||||
if len(data) < int(length) {
|
||||
return -2
|
||||
}
|
||||
|
||||
if cmd != IKCP_CMD_PUSH && cmd != IKCP_CMD_ACK &&
|
||||
cmd != IKCP_CMD_WASK && cmd != IKCP_CMD_WINS {
|
||||
return -3
|
||||
}
|
||||
|
||||
// only trust window updates from regular packets. i.e: latest update
|
||||
if regular {
|
||||
kcp.rmt_wnd = uint32(wnd)
|
||||
}
|
||||
kcp.parse_una(una)
|
||||
kcp.shrink_buf()
|
||||
|
||||
if cmd == IKCP_CMD_ACK {
|
||||
kcp.parse_ack(sn)
|
||||
kcp.shrink_buf()
|
||||
if flag == 0 {
|
||||
flag = 1
|
||||
maxack = sn
|
||||
} else if _itimediff(sn, maxack) > 0 {
|
||||
maxack = sn
|
||||
}
|
||||
lastackts = ts
|
||||
} else if cmd == IKCP_CMD_PUSH {
|
||||
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
|
||||
kcp.ack_push(sn, ts)
|
||||
if _itimediff(sn, kcp.rcv_nxt) >= 0 {
|
||||
seg := kcp.newSegment(int(length))
|
||||
seg.conv = conv
|
||||
seg.cmd = cmd
|
||||
seg.frg = frg
|
||||
seg.wnd = wnd
|
||||
seg.ts = ts
|
||||
seg.sn = sn
|
||||
seg.una = una
|
||||
copy(seg.data, data[:length])
|
||||
kcp.parse_data(seg)
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
|
||||
}
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
|
||||
}
|
||||
} else if cmd == IKCP_CMD_WASK {
|
||||
// ready to send back IKCP_CMD_WINS in Ikcp_flush
|
||||
// tell remote my window size
|
||||
kcp.probe |= IKCP_ASK_TELL
|
||||
} else if cmd == IKCP_CMD_WINS {
|
||||
// do nothing
|
||||
} else {
|
||||
return -3
|
||||
}
|
||||
|
||||
inSegs++
|
||||
data = data[length:]
|
||||
}
|
||||
atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
|
||||
|
||||
if flag != 0 && regular {
|
||||
kcp.parse_fastack(maxack)
|
||||
current := currentMs()
|
||||
if _itimediff(current, lastackts) >= 0 {
|
||||
kcp.update_ack(_itimediff(current, lastackts))
|
||||
}
|
||||
}
|
||||
|
||||
if _itimediff(kcp.snd_una, una) > 0 {
|
||||
if kcp.cwnd < kcp.rmt_wnd {
|
||||
mss := kcp.mss
|
||||
if kcp.cwnd < kcp.ssthresh {
|
||||
kcp.cwnd++
|
||||
kcp.incr += mss
|
||||
} else {
|
||||
if kcp.incr < mss {
|
||||
kcp.incr = mss
|
||||
}
|
||||
kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
|
||||
if (kcp.cwnd+1)*mss <= kcp.incr {
|
||||
kcp.cwnd++
|
||||
}
|
||||
}
|
||||
if kcp.cwnd > kcp.rmt_wnd {
|
||||
kcp.cwnd = kcp.rmt_wnd
|
||||
kcp.incr = kcp.rmt_wnd * mss
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
|
||||
kcp.flush(true)
|
||||
} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
|
||||
kcp.flush(true)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kcp *KCP) wnd_unused() uint16 {
|
||||
if len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
|
||||
return uint16(int(kcp.rcv_wnd) - len(kcp.rcv_queue))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// flush pending data
|
||||
func (kcp *KCP) flush(ackOnly bool) {
|
||||
var seg segment
|
||||
seg.conv = kcp.conv
|
||||
seg.cmd = IKCP_CMD_ACK
|
||||
seg.wnd = kcp.wnd_unused()
|
||||
seg.una = kcp.rcv_nxt
|
||||
|
||||
buffer := kcp.buffer
|
||||
// flush acknowledges
|
||||
ptr := buffer
|
||||
for i, ack := range kcp.acklist {
|
||||
size := len(buffer) - len(ptr)
|
||||
if size+IKCP_OVERHEAD > int(kcp.mtu) {
|
||||
kcp.output(buffer, size)
|
||||
ptr = buffer
|
||||
}
|
||||
// filter jitters caused by bufferbloat
|
||||
if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
|
||||
seg.sn, seg.ts = ack.sn, ack.ts
|
||||
ptr = seg.encode(ptr)
|
||||
}
|
||||
}
|
||||
kcp.acklist = kcp.acklist[0:0]
|
||||
|
||||
if ackOnly { // flash remain ack segments
|
||||
size := len(buffer) - len(ptr)
|
||||
if size > 0 {
|
||||
kcp.output(buffer, size)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// probe window size (if remote window size equals zero)
|
||||
if kcp.rmt_wnd == 0 {
|
||||
current := currentMs()
|
||||
if kcp.probe_wait == 0 {
|
||||
kcp.probe_wait = IKCP_PROBE_INIT
|
||||
kcp.ts_probe = current + kcp.probe_wait
|
||||
} else {
|
||||
if _itimediff(current, kcp.ts_probe) >= 0 {
|
||||
if kcp.probe_wait < IKCP_PROBE_INIT {
|
||||
kcp.probe_wait = IKCP_PROBE_INIT
|
||||
}
|
||||
kcp.probe_wait += kcp.probe_wait / 2
|
||||
if kcp.probe_wait > IKCP_PROBE_LIMIT {
|
||||
kcp.probe_wait = IKCP_PROBE_LIMIT
|
||||
}
|
||||
kcp.ts_probe = current + kcp.probe_wait
|
||||
kcp.probe |= IKCP_ASK_SEND
|
||||
}
|
||||
}
|
||||
} else {
|
||||
kcp.ts_probe = 0
|
||||
kcp.probe_wait = 0
|
||||
}
|
||||
|
||||
// flush window probing commands
|
||||
if (kcp.probe & IKCP_ASK_SEND) != 0 {
|
||||
seg.cmd = IKCP_CMD_WASK
|
||||
size := len(buffer) - len(ptr)
|
||||
if size+IKCP_OVERHEAD > int(kcp.mtu) {
|
||||
kcp.output(buffer, size)
|
||||
ptr = buffer
|
||||
}
|
||||
ptr = seg.encode(ptr)
|
||||
}
|
||||
|
||||
// flush window probing commands
|
||||
if (kcp.probe & IKCP_ASK_TELL) != 0 {
|
||||
seg.cmd = IKCP_CMD_WINS
|
||||
size := len(buffer) - len(ptr)
|
||||
if size+IKCP_OVERHEAD > int(kcp.mtu) {
|
||||
kcp.output(buffer, size)
|
||||
ptr = buffer
|
||||
}
|
||||
ptr = seg.encode(ptr)
|
||||
}
|
||||
|
||||
kcp.probe = 0
|
||||
|
||||
// calculate window size
|
||||
cwnd := _imin_(kcp.snd_wnd, kcp.rmt_wnd)
|
||||
if kcp.nocwnd == 0 {
|
||||
cwnd = _imin_(kcp.cwnd, cwnd)
|
||||
}
|
||||
|
||||
// sliding window, controlled by snd_nxt && sna_una+cwnd
|
||||
newSegsCount := 0
|
||||
for k := range kcp.snd_queue {
|
||||
if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
|
||||
break
|
||||
}
|
||||
newseg := kcp.snd_queue[k]
|
||||
newseg.conv = kcp.conv
|
||||
newseg.cmd = IKCP_CMD_PUSH
|
||||
newseg.sn = kcp.snd_nxt
|
||||
kcp.snd_buf = append(kcp.snd_buf, newseg)
|
||||
kcp.snd_nxt++
|
||||
newSegsCount++
|
||||
kcp.snd_queue[k].data = nil
|
||||
}
|
||||
if newSegsCount > 0 {
|
||||
kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
|
||||
}
|
||||
|
||||
// calculate resent
|
||||
resent := uint32(kcp.fastresend)
|
||||
if kcp.fastresend <= 0 {
|
||||
resent = 0xffffffff
|
||||
}
|
||||
|
||||
// check for retransmissions
|
||||
current := currentMs()
|
||||
var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
|
||||
for k := range kcp.snd_buf {
|
||||
segment := &kcp.snd_buf[k]
|
||||
needsend := false
|
||||
if segment.xmit == 0 { // initial transmit
|
||||
needsend = true
|
||||
segment.rto = kcp.rx_rto
|
||||
segment.resendts = current + segment.rto
|
||||
} else if _itimediff(current, segment.resendts) >= 0 { // RTO
|
||||
needsend = true
|
||||
if kcp.nodelay == 0 {
|
||||
segment.rto += kcp.rx_rto
|
||||
} else {
|
||||
segment.rto += kcp.rx_rto / 2
|
||||
}
|
||||
segment.resendts = current + segment.rto
|
||||
lost++
|
||||
lostSegs++
|
||||
} else if segment.fastack >= resent { // fast retransmit
|
||||
needsend = true
|
||||
segment.fastack = 0
|
||||
segment.rto = kcp.rx_rto
|
||||
segment.resendts = current + segment.rto
|
||||
change++
|
||||
fastRetransSegs++
|
||||
} else if segment.fastack > 0 && newSegsCount == 0 { // early retransmit
|
||||
needsend = true
|
||||
segment.fastack = 0
|
||||
segment.rto = kcp.rx_rto
|
||||
segment.resendts = current + segment.rto
|
||||
change++
|
||||
earlyRetransSegs++
|
||||
}
|
||||
|
||||
if needsend {
|
||||
segment.xmit++
|
||||
segment.ts = current
|
||||
segment.wnd = seg.wnd
|
||||
segment.una = seg.una
|
||||
|
||||
size := len(buffer) - len(ptr)
|
||||
need := IKCP_OVERHEAD + len(segment.data)
|
||||
|
||||
if size+need > int(kcp.mtu) {
|
||||
kcp.output(buffer, size)
|
||||
current = currentMs() // time update for a blocking call
|
||||
ptr = buffer
|
||||
}
|
||||
|
||||
ptr = segment.encode(ptr)
|
||||
copy(ptr, segment.data)
|
||||
ptr = ptr[len(segment.data):]
|
||||
|
||||
if segment.xmit >= kcp.dead_link {
|
||||
kcp.state = 0xFFFFFFFF
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flash remain segments
|
||||
size := len(buffer) - len(ptr)
|
||||
if size > 0 {
|
||||
kcp.output(buffer, size)
|
||||
}
|
||||
|
||||
// counter updates
|
||||
sum := lostSegs
|
||||
if lostSegs > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
|
||||
}
|
||||
if fastRetransSegs > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
|
||||
sum += fastRetransSegs
|
||||
}
|
||||
if earlyRetransSegs > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
|
||||
sum += earlyRetransSegs
|
||||
}
|
||||
if sum > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
|
||||
}
|
||||
|
||||
// update ssthresh
|
||||
// rate halving, https://tools.ietf.org/html/rfc6937
|
||||
if change > 0 {
|
||||
inflight := kcp.snd_nxt - kcp.snd_una
|
||||
kcp.ssthresh = inflight / 2
|
||||
if kcp.ssthresh < IKCP_THRESH_MIN {
|
||||
kcp.ssthresh = IKCP_THRESH_MIN
|
||||
}
|
||||
kcp.cwnd = kcp.ssthresh + resent
|
||||
kcp.incr = kcp.cwnd * kcp.mss
|
||||
}
|
||||
|
||||
// congestion control, https://tools.ietf.org/html/rfc5681
|
||||
if lost > 0 {
|
||||
kcp.ssthresh = cwnd / 2
|
||||
if kcp.ssthresh < IKCP_THRESH_MIN {
|
||||
kcp.ssthresh = IKCP_THRESH_MIN
|
||||
}
|
||||
kcp.cwnd = 1
|
||||
kcp.incr = kcp.mss
|
||||
}
|
||||
|
||||
if kcp.cwnd < 1 {
|
||||
kcp.cwnd = 1
|
||||
kcp.incr = kcp.mss
|
||||
}
|
||||
}
|
||||
|
||||
// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
|
||||
// ikcp_check when to call it again (without ikcp_input/_send calling).
|
||||
// 'current' - current timestamp in millisec.
|
||||
func (kcp *KCP) Update() {
|
||||
var slap int32
|
||||
|
||||
current := currentMs()
|
||||
if kcp.updated == 0 {
|
||||
kcp.updated = 1
|
||||
kcp.ts_flush = current
|
||||
}
|
||||
|
||||
slap = _itimediff(current, kcp.ts_flush)
|
||||
|
||||
if slap >= 10000 || slap < -10000 {
|
||||
kcp.ts_flush = current
|
||||
slap = 0
|
||||
}
|
||||
|
||||
if slap >= 0 {
|
||||
kcp.ts_flush += kcp.interval
|
||||
if _itimediff(current, kcp.ts_flush) >= 0 {
|
||||
kcp.ts_flush = current + kcp.interval
|
||||
}
|
||||
kcp.flush(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Check determines when should you invoke ikcp_update:
|
||||
// returns when you should invoke ikcp_update in millisec, if there
|
||||
// is no ikcp_input/_send calling. you can call ikcp_update in that
|
||||
// time, instead of call update repeatly.
|
||||
// Important to reduce unnacessary ikcp_update invoking. use it to
|
||||
// schedule ikcp_update (eg. implementing an epoll-like mechanism,
|
||||
// or optimize ikcp_update when handling massive kcp connections)
|
||||
func (kcp *KCP) Check() uint32 {
|
||||
current := currentMs()
|
||||
ts_flush := kcp.ts_flush
|
||||
tm_flush := int32(0x7fffffff)
|
||||
tm_packet := int32(0x7fffffff)
|
||||
minimal := uint32(0)
|
||||
if kcp.updated == 0 {
|
||||
return current
|
||||
}
|
||||
|
||||
if _itimediff(current, ts_flush) >= 10000 ||
|
||||
_itimediff(current, ts_flush) < -10000 {
|
||||
ts_flush = current
|
||||
}
|
||||
|
||||
if _itimediff(current, ts_flush) >= 0 {
|
||||
return current
|
||||
}
|
||||
|
||||
tm_flush = _itimediff(ts_flush, current)
|
||||
|
||||
for k := range kcp.snd_buf {
|
||||
seg := &kcp.snd_buf[k]
|
||||
diff := _itimediff(seg.resendts, current)
|
||||
if diff <= 0 {
|
||||
return current
|
||||
}
|
||||
if diff < tm_packet {
|
||||
tm_packet = diff
|
||||
}
|
||||
}
|
||||
|
||||
minimal = uint32(tm_packet)
|
||||
if tm_packet >= tm_flush {
|
||||
minimal = uint32(tm_flush)
|
||||
}
|
||||
if minimal >= kcp.interval {
|
||||
minimal = kcp.interval
|
||||
}
|
||||
|
||||
return current + minimal
|
||||
}
|
||||
|
||||
// SetMtu changes MTU size, default is 1400
|
||||
func (kcp *KCP) SetMtu(mtu int) int {
|
||||
if mtu < 50 || mtu < IKCP_OVERHEAD {
|
||||
return -1
|
||||
}
|
||||
buffer := make([]byte, (mtu+IKCP_OVERHEAD)*3)
|
||||
if buffer == nil {
|
||||
return -2
|
||||
}
|
||||
kcp.mtu = uint32(mtu)
|
||||
kcp.mss = kcp.mtu - IKCP_OVERHEAD
|
||||
kcp.buffer = buffer
|
||||
return 0
|
||||
}
|
||||
|
||||
// NoDelay options
|
||||
// fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
|
||||
// nodelay: 0:disable(default), 1:enable
|
||||
// interval: internal update timer interval in millisec, default is 100ms
|
||||
// resend: 0:disable fast resend(default), 1:enable fast resend
|
||||
// nc: 0:normal congestion control(default), 1:disable congestion control
|
||||
func (kcp *KCP) NoDelay(nodelay, interval, resend, nc int) int {
|
||||
if nodelay >= 0 {
|
||||
kcp.nodelay = uint32(nodelay)
|
||||
if nodelay != 0 {
|
||||
kcp.rx_minrto = IKCP_RTO_NDL
|
||||
} else {
|
||||
kcp.rx_minrto = IKCP_RTO_MIN
|
||||
}
|
||||
}
|
||||
if interval >= 0 {
|
||||
if interval > 5000 {
|
||||
interval = 5000
|
||||
} else if interval < 10 {
|
||||
interval = 10
|
||||
}
|
||||
kcp.interval = uint32(interval)
|
||||
}
|
||||
if resend >= 0 {
|
||||
kcp.fastresend = int32(resend)
|
||||
}
|
||||
if nc >= 0 {
|
||||
kcp.nocwnd = int32(nc)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// WndSize sets maximum window size: sndwnd=32, rcvwnd=32 by default
|
||||
func (kcp *KCP) WndSize(sndwnd, rcvwnd int) int {
|
||||
if sndwnd > 0 {
|
||||
kcp.snd_wnd = uint32(sndwnd)
|
||||
}
|
||||
if rcvwnd > 0 {
|
||||
kcp.rcv_wnd = uint32(rcvwnd)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// WaitSnd gets how many packet is waiting to be sent
|
||||
func (kcp *KCP) WaitSnd() int {
|
||||
return len(kcp.snd_buf) + len(kcp.snd_queue)
|
||||
}
|
||||
|
||||
// remove front n elements from queue
|
||||
func (kcp *KCP) remove_front(q []segment, n int) []segment {
|
||||
newn := copy(q, q[n:])
|
||||
for i := newn; i < len(q); i++ {
|
||||
q[i] = segment{} // manual set nil for GC
|
||||
}
|
||||
return q[:newn]
|
||||
}
|
975
vendor/github.com/fatedier/kcp-go/sess.go
generated
vendored
Normal file
975
vendor/github.com/fatedier/kcp-go/sess.go
generated
vendored
Normal file
@ -0,0 +1,975 @@
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"net"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/ipv4"
|
||||
)
|
||||
|
||||
type errTimeout struct {
|
||||
error
|
||||
}
|
||||
|
||||
func (errTimeout) Timeout() bool { return true }
|
||||
func (errTimeout) Temporary() bool { return true }
|
||||
func (errTimeout) Error() string { return "i/o timeout" }
|
||||
|
||||
const (
|
||||
// 16-bytes magic number for each packet
|
||||
nonceSize = 16
|
||||
|
||||
// 4-bytes packet checksum
|
||||
crcSize = 4
|
||||
|
||||
// overall crypto header size
|
||||
cryptHeaderSize = nonceSize + crcSize
|
||||
|
||||
// maximum packet size
|
||||
mtuLimit = 1500
|
||||
|
||||
// FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
|
||||
rxFECMulti = 3
|
||||
|
||||
// accept backlog
|
||||
acceptBacklog = 128
|
||||
|
||||
// prerouting(to session) queue
|
||||
qlen = 128
|
||||
)
|
||||
|
||||
const (
|
||||
errBrokenPipe = "broken pipe"
|
||||
errInvalidOperation = "invalid operation"
|
||||
)
|
||||
|
||||
var (
|
||||
// global packet buffer
|
||||
// shared among sending/receiving/FEC
|
||||
xmitBuf sync.Pool
|
||||
)
|
||||
|
||||
func init() {
|
||||
xmitBuf.New = func() interface{} {
|
||||
return make([]byte, mtuLimit)
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
// UDPSession defines a KCP session implemented by UDP
|
||||
UDPSession struct {
|
||||
updaterIdx int // record slice index in updater
|
||||
conn net.PacketConn // the underlying packet connection
|
||||
kcp *KCP // KCP ARQ protocol
|
||||
l *Listener // point to the Listener if it's accepted by Listener
|
||||
block BlockCrypt // block encryption
|
||||
|
||||
// kcp receiving is based on packets
|
||||
// recvbuf turns packets into stream
|
||||
recvbuf []byte
|
||||
bufptr []byte
|
||||
// extended output buffer(with header)
|
||||
ext []byte
|
||||
|
||||
// FEC
|
||||
fecDecoder *fecDecoder
|
||||
fecEncoder *fecEncoder
|
||||
|
||||
// settings
|
||||
remote net.Addr // remote peer address
|
||||
rd time.Time // read deadline
|
||||
wd time.Time // write deadline
|
||||
headerSize int // the overall header size added before KCP frame
|
||||
ackNoDelay bool // send ack immediately for each incoming packet
|
||||
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
|
||||
dup int // duplicate udp packets
|
||||
|
||||
// notifications
|
||||
die chan struct{} // notify session has Closed
|
||||
chReadEvent chan struct{} // notify Read() can be called without blocking
|
||||
chWriteEvent chan struct{} // notify Write() can be called without blocking
|
||||
chErrorEvent chan error // notify Read() have an error
|
||||
|
||||
isClosed bool // flag the session has Closed
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
setReadBuffer interface {
|
||||
SetReadBuffer(bytes int) error
|
||||
}
|
||||
|
||||
setWriteBuffer interface {
|
||||
SetWriteBuffer(bytes int) error
|
||||
}
|
||||
)
|
||||
|
||||
// newUDPSession create a new udp session for client or server
|
||||
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
|
||||
sess := new(UDPSession)
|
||||
sess.die = make(chan struct{})
|
||||
sess.chReadEvent = make(chan struct{}, 1)
|
||||
sess.chWriteEvent = make(chan struct{}, 1)
|
||||
sess.chErrorEvent = make(chan error, 1)
|
||||
sess.remote = remote
|
||||
sess.conn = conn
|
||||
sess.l = l
|
||||
sess.block = block
|
||||
sess.recvbuf = make([]byte, mtuLimit)
|
||||
|
||||
// FEC initialization
|
||||
sess.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
|
||||
if sess.block != nil {
|
||||
sess.fecEncoder = newFECEncoder(dataShards, parityShards, cryptHeaderSize)
|
||||
} else {
|
||||
sess.fecEncoder = newFECEncoder(dataShards, parityShards, 0)
|
||||
}
|
||||
|
||||
// calculate header size
|
||||
if sess.block != nil {
|
||||
sess.headerSize += cryptHeaderSize
|
||||
}
|
||||
if sess.fecEncoder != nil {
|
||||
sess.headerSize += fecHeaderSizePlus2
|
||||
}
|
||||
|
||||
// only allocate extended packet buffer
|
||||
// when the extra header is required
|
||||
if sess.headerSize > 0 {
|
||||
sess.ext = make([]byte, mtuLimit)
|
||||
}
|
||||
|
||||
sess.kcp = NewKCP(conv, func(buf []byte, size int) {
|
||||
if size >= IKCP_OVERHEAD {
|
||||
sess.output(buf[:size])
|
||||
}
|
||||
})
|
||||
sess.kcp.SetMtu(IKCP_MTU_DEF - sess.headerSize)
|
||||
|
||||
// add current session to the global updater,
|
||||
// which periodically calls sess.update()
|
||||
updater.addSession(sess)
|
||||
|
||||
if sess.l == nil { // it's a client connection
|
||||
go sess.readLoop()
|
||||
atomic.AddUint64(&DefaultSnmp.ActiveOpens, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.PassiveOpens, 1)
|
||||
}
|
||||
currestab := atomic.AddUint64(&DefaultSnmp.CurrEstab, 1)
|
||||
maxconn := atomic.LoadUint64(&DefaultSnmp.MaxConn)
|
||||
if currestab > maxconn {
|
||||
atomic.CompareAndSwapUint64(&DefaultSnmp.MaxConn, maxconn, currestab)
|
||||
}
|
||||
|
||||
return sess
|
||||
}
|
||||
|
||||
// Read implements net.Conn
|
||||
func (s *UDPSession) Read(b []byte) (n int, err error) {
|
||||
for {
|
||||
s.mu.Lock()
|
||||
if len(s.bufptr) > 0 { // copy from buffer into b
|
||||
n = copy(b, s.bufptr)
|
||||
s.bufptr = s.bufptr[n:]
|
||||
s.mu.Unlock()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
if s.isClosed {
|
||||
s.mu.Unlock()
|
||||
return 0, errors.New(errBrokenPipe)
|
||||
}
|
||||
|
||||
if size := s.kcp.PeekSize(); size > 0 { // peek data size from kcp
|
||||
atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
|
||||
if len(b) >= size { // direct write to b
|
||||
s.kcp.Recv(b)
|
||||
s.mu.Unlock()
|
||||
return size, nil
|
||||
}
|
||||
|
||||
// resize kcp receive buffer
|
||||
// to make sure recvbuf has enough capacity
|
||||
if cap(s.recvbuf) < size {
|
||||
s.recvbuf = make([]byte, size)
|
||||
}
|
||||
|
||||
// resize recvbuf slice length
|
||||
s.recvbuf = s.recvbuf[:size]
|
||||
s.kcp.Recv(s.recvbuf)
|
||||
n = copy(b, s.recvbuf) // copy to b
|
||||
s.bufptr = s.recvbuf[n:] // update pointer
|
||||
s.mu.Unlock()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// read deadline
|
||||
var timeout *time.Timer
|
||||
var c <-chan time.Time
|
||||
if !s.rd.IsZero() {
|
||||
if time.Now().After(s.rd) {
|
||||
s.mu.Unlock()
|
||||
return 0, errTimeout{}
|
||||
}
|
||||
|
||||
delay := s.rd.Sub(time.Now())
|
||||
timeout = time.NewTimer(delay)
|
||||
c = timeout.C
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
// wait for read event or timeout
|
||||
select {
|
||||
case <-s.chReadEvent:
|
||||
case <-c:
|
||||
case <-s.die:
|
||||
case err = <-s.chErrorEvent:
|
||||
if timeout != nil {
|
||||
timeout.Stop()
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
if timeout != nil {
|
||||
timeout.Stop()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write implements net.Conn
|
||||
func (s *UDPSession) Write(b []byte) (n int, err error) {
|
||||
for {
|
||||
s.mu.Lock()
|
||||
if s.isClosed {
|
||||
s.mu.Unlock()
|
||||
return 0, errors.New(errBrokenPipe)
|
||||
}
|
||||
|
||||
// api flow control
|
||||
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
|
||||
n = len(b)
|
||||
for {
|
||||
if len(b) <= int(s.kcp.mss) {
|
||||
s.kcp.Send(b)
|
||||
break
|
||||
} else {
|
||||
s.kcp.Send(b[:s.kcp.mss])
|
||||
b = b[s.kcp.mss:]
|
||||
}
|
||||
}
|
||||
|
||||
if !s.writeDelay {
|
||||
s.kcp.flush(false)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// write deadline
|
||||
var timeout *time.Timer
|
||||
var c <-chan time.Time
|
||||
if !s.wd.IsZero() {
|
||||
if time.Now().After(s.wd) {
|
||||
s.mu.Unlock()
|
||||
return 0, errTimeout{}
|
||||
}
|
||||
delay := s.wd.Sub(time.Now())
|
||||
timeout = time.NewTimer(delay)
|
||||
c = timeout.C
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
// wait for write event or timeout
|
||||
select {
|
||||
case <-s.chWriteEvent:
|
||||
case <-c:
|
||||
case <-s.die:
|
||||
}
|
||||
|
||||
if timeout != nil {
|
||||
timeout.Stop()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the connection.
|
||||
func (s *UDPSession) Close() error {
|
||||
// remove this session from updater & listener(if necessary)
|
||||
updater.removeSession(s)
|
||||
if s.l != nil { // notify listener
|
||||
s.l.closeSession(sessionKey{
|
||||
addr: s.remote.String(),
|
||||
convID: s.kcp.conv,
|
||||
})
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.isClosed {
|
||||
return errors.New(errBrokenPipe)
|
||||
}
|
||||
close(s.die)
|
||||
s.isClosed = true
|
||||
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
|
||||
if s.l == nil { // client socket close
|
||||
return s.conn.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LocalAddr returns the local network address. The Addr returned is shared by all invocations of LocalAddr, so do not modify it.
|
||||
func (s *UDPSession) LocalAddr() net.Addr { return s.conn.LocalAddr() }
|
||||
|
||||
// RemoteAddr returns the remote network address. The Addr returned is shared by all invocations of RemoteAddr, so do not modify it.
|
||||
func (s *UDPSession) RemoteAddr() net.Addr { return s.remote }
|
||||
|
||||
// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
|
||||
func (s *UDPSession) SetDeadline(t time.Time) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.rd = t
|
||||
s.wd = t
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetReadDeadline implements the Conn SetReadDeadline method.
|
||||
func (s *UDPSession) SetReadDeadline(t time.Time) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.rd = t
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetWriteDeadline implements the Conn SetWriteDeadline method.
|
||||
func (s *UDPSession) SetWriteDeadline(t time.Time) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.wd = t
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetWriteDelay delays write for bulk transfer until the next update interval
|
||||
func (s *UDPSession) SetWriteDelay(delay bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.writeDelay = delay
|
||||
}
|
||||
|
||||
// SetWindowSize set maximum window size
|
||||
func (s *UDPSession) SetWindowSize(sndwnd, rcvwnd int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.kcp.WndSize(sndwnd, rcvwnd)
|
||||
}
|
||||
|
||||
// SetMtu sets the maximum transmission unit(not including UDP header)
|
||||
func (s *UDPSession) SetMtu(mtu int) bool {
|
||||
if mtu > mtuLimit {
|
||||
return false
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.kcp.SetMtu(mtu - s.headerSize)
|
||||
return true
|
||||
}
|
||||
|
||||
// SetStreamMode toggles the stream mode on/off
|
||||
func (s *UDPSession) SetStreamMode(enable bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if enable {
|
||||
s.kcp.stream = 1
|
||||
} else {
|
||||
s.kcp.stream = 0
|
||||
}
|
||||
}
|
||||
|
||||
// SetACKNoDelay changes ack flush option, set true to flush ack immediately,
|
||||
func (s *UDPSession) SetACKNoDelay(nodelay bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.ackNoDelay = nodelay
|
||||
}
|
||||
|
||||
// SetDUP duplicates udp packets for kcp output, for testing purpose only
|
||||
func (s *UDPSession) SetDUP(dup int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.dup = dup
|
||||
}
|
||||
|
||||
// SetNoDelay calls nodelay() of kcp
|
||||
// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
|
||||
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.kcp.NoDelay(nodelay, interval, resend, nc)
|
||||
}
|
||||
|
||||
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
|
||||
func (s *UDPSession) SetDSCP(dscp int) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.l == nil {
|
||||
if nc, ok := s.conn.(*connectedUDPConn); ok {
|
||||
return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
|
||||
} else if nc, ok := s.conn.(net.Conn); ok {
|
||||
return ipv4.NewConn(nc).SetTOS(dscp << 2)
|
||||
}
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener
|
||||
func (s *UDPSession) SetReadBuffer(bytes int) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.l == nil {
|
||||
if nc, ok := s.conn.(setReadBuffer); ok {
|
||||
return nc.SetReadBuffer(bytes)
|
||||
}
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener
|
||||
func (s *UDPSession) SetWriteBuffer(bytes int) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.l == nil {
|
||||
if nc, ok := s.conn.(setWriteBuffer); ok {
|
||||
return nc.SetWriteBuffer(bytes)
|
||||
}
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// output pipeline entry
|
||||
// steps for output data processing:
|
||||
// 0. Header extends
|
||||
// 1. FEC
|
||||
// 2. CRC32
|
||||
// 3. Encryption
|
||||
// 4. WriteTo kernel
|
||||
func (s *UDPSession) output(buf []byte) {
|
||||
var ecc [][]byte
|
||||
|
||||
// 0. extend buf's header space(if necessary)
|
||||
ext := buf
|
||||
if s.headerSize > 0 {
|
||||
ext = s.ext[:s.headerSize+len(buf)]
|
||||
copy(ext[s.headerSize:], buf)
|
||||
}
|
||||
|
||||
// 1. FEC encoding
|
||||
if s.fecEncoder != nil {
|
||||
ecc = s.fecEncoder.encode(ext)
|
||||
}
|
||||
|
||||
// 2&3. crc32 & encryption
|
||||
if s.block != nil {
|
||||
io.ReadFull(rand.Reader, ext[:nonceSize])
|
||||
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
|
||||
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
|
||||
s.block.Encrypt(ext, ext)
|
||||
|
||||
for k := range ecc {
|
||||
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
|
||||
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
|
||||
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
|
||||
s.block.Encrypt(ecc[k], ecc[k])
|
||||
}
|
||||
}
|
||||
|
||||
// 4. WriteTo kernel
|
||||
nbytes := 0
|
||||
npkts := 0
|
||||
for i := 0; i < s.dup+1; i++ {
|
||||
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
}
|
||||
}
|
||||
|
||||
for k := range ecc {
|
||||
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
}
|
||||
}
|
||||
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
|
||||
atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
|
||||
}
|
||||
|
||||
// kcp update, returns interval for next calling
|
||||
func (s *UDPSession) update() (interval time.Duration) {
|
||||
s.mu.Lock()
|
||||
s.kcp.flush(false)
|
||||
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
|
||||
s.notifyWriteEvent()
|
||||
}
|
||||
interval = time.Duration(s.kcp.interval) * time.Millisecond
|
||||
s.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// GetConv gets conversation id of a session
|
||||
func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
|
||||
|
||||
func (s *UDPSession) notifyReadEvent() {
|
||||
select {
|
||||
case s.chReadEvent <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (s *UDPSession) notifyWriteEvent() {
|
||||
select {
|
||||
case s.chWriteEvent <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (s *UDPSession) kcpInput(data []byte) {
|
||||
var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64
|
||||
|
||||
if s.fecDecoder != nil {
|
||||
f := s.fecDecoder.decodeBytes(data)
|
||||
s.mu.Lock()
|
||||
if f.flag == typeData {
|
||||
if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
|
||||
kcpInErrors++
|
||||
}
|
||||
}
|
||||
|
||||
if f.flag == typeData || f.flag == typeFEC {
|
||||
if f.flag == typeFEC {
|
||||
fecParityShards++
|
||||
}
|
||||
|
||||
recovers := s.fecDecoder.decode(f)
|
||||
for _, r := range recovers {
|
||||
if len(r) >= 2 { // must be larger than 2bytes
|
||||
sz := binary.LittleEndian.Uint16(r)
|
||||
if int(sz) <= len(r) && sz >= 2 {
|
||||
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
|
||||
fecRecovered++
|
||||
} else {
|
||||
kcpInErrors++
|
||||
}
|
||||
} else {
|
||||
fecErrs++
|
||||
}
|
||||
} else {
|
||||
fecErrs++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// notify reader
|
||||
if n := s.kcp.PeekSize(); n > 0 {
|
||||
s.notifyReadEvent()
|
||||
}
|
||||
s.mu.Unlock()
|
||||
} else {
|
||||
s.mu.Lock()
|
||||
if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
|
||||
kcpInErrors++
|
||||
}
|
||||
// notify reader
|
||||
if n := s.kcp.PeekSize(); n > 0 {
|
||||
s.notifyReadEvent()
|
||||
}
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
atomic.AddUint64(&DefaultSnmp.InPkts, 1)
|
||||
atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
|
||||
if fecParityShards > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.FECParityShards, fecParityShards)
|
||||
}
|
||||
if kcpInErrors > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
|
||||
}
|
||||
if fecErrs > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs)
|
||||
}
|
||||
if fecRecovered > 0 {
|
||||
atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *UDPSession) receiver(ch chan<- []byte) {
|
||||
for {
|
||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
|
||||
select {
|
||||
case ch <- data[:n]:
|
||||
case <-s.die:
|
||||
return
|
||||
}
|
||||
} else if err != nil {
|
||||
s.chErrorEvent <- err
|
||||
return
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read loop for client session
|
||||
func (s *UDPSession) readLoop() {
|
||||
chPacket := make(chan []byte, qlen)
|
||||
go s.receiver(chPacket)
|
||||
|
||||
for {
|
||||
select {
|
||||
case data := <-chPacket:
|
||||
raw := data
|
||||
dataValid := false
|
||||
if s.block != nil {
|
||||
s.block.Decrypt(data, data)
|
||||
data = data[nonceSize:]
|
||||
checksum := crc32.ChecksumIEEE(data[crcSize:])
|
||||
if checksum == binary.LittleEndian.Uint32(data) {
|
||||
data = data[crcSize:]
|
||||
dataValid = true
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
|
||||
}
|
||||
} else if s.block == nil {
|
||||
dataValid = true
|
||||
}
|
||||
|
||||
if dataValid {
|
||||
s.kcpInput(data)
|
||||
}
|
||||
xmitBuf.Put(raw)
|
||||
case <-s.die:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
sessionKey struct {
|
||||
addr string
|
||||
convID uint32
|
||||
}
|
||||
|
||||
// Listener defines a server listening for connections
|
||||
Listener struct {
|
||||
block BlockCrypt // block encryption
|
||||
dataShards int // FEC data shard
|
||||
parityShards int // FEC parity shard
|
||||
fecDecoder *fecDecoder // FEC mock initialization
|
||||
conn net.PacketConn // the underlying packet connection
|
||||
|
||||
sessions map[sessionKey]*UDPSession // all sessions accepted by this Listener
|
||||
chAccepts chan *UDPSession // Listen() backlog
|
||||
chSessionClosed chan sessionKey // session close queue
|
||||
headerSize int // the overall header size added before KCP frame
|
||||
die chan struct{} // notify the listener has closed
|
||||
rd atomic.Value // read deadline for Accept()
|
||||
wd atomic.Value
|
||||
}
|
||||
|
||||
// incoming packet
|
||||
inPacket struct {
|
||||
from net.Addr
|
||||
data []byte
|
||||
}
|
||||
)
|
||||
|
||||
// monitor incoming data for all connections of server
|
||||
func (l *Listener) monitor() {
|
||||
// cache last session
|
||||
var lastKey sessionKey
|
||||
var lastSession *UDPSession
|
||||
|
||||
chPacket := make(chan inPacket, qlen)
|
||||
go l.receiver(chPacket)
|
||||
for {
|
||||
select {
|
||||
case p := <-chPacket:
|
||||
raw := p.data
|
||||
data := p.data
|
||||
from := p.from
|
||||
dataValid := false
|
||||
if l.block != nil {
|
||||
l.block.Decrypt(data, data)
|
||||
data = data[nonceSize:]
|
||||
checksum := crc32.ChecksumIEEE(data[crcSize:])
|
||||
if checksum == binary.LittleEndian.Uint32(data) {
|
||||
data = data[crcSize:]
|
||||
dataValid = true
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
|
||||
}
|
||||
} else if l.block == nil {
|
||||
dataValid = true
|
||||
}
|
||||
|
||||
if dataValid {
|
||||
var conv uint32
|
||||
convValid := false
|
||||
if l.fecDecoder != nil {
|
||||
isfec := binary.LittleEndian.Uint16(data[4:])
|
||||
if isfec == typeData {
|
||||
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
|
||||
convValid = true
|
||||
}
|
||||
} else {
|
||||
conv = binary.LittleEndian.Uint32(data)
|
||||
convValid = true
|
||||
}
|
||||
|
||||
if convValid {
|
||||
key := sessionKey{
|
||||
addr: from.String(),
|
||||
convID: conv,
|
||||
}
|
||||
var s *UDPSession
|
||||
var ok bool
|
||||
|
||||
// packets received from an address always come in batch.
|
||||
// cache the session for next packet, without querying map.
|
||||
if key == lastKey {
|
||||
s, ok = lastSession, true
|
||||
} else if s, ok = l.sessions[key]; ok {
|
||||
lastSession = s
|
||||
lastKey = key
|
||||
}
|
||||
|
||||
if !ok { // new session
|
||||
if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
|
||||
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
|
||||
s.kcpInput(data)
|
||||
l.sessions[key] = s
|
||||
l.chAccepts <- s
|
||||
}
|
||||
} else {
|
||||
s.kcpInput(data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xmitBuf.Put(raw)
|
||||
case key := <-l.chSessionClosed:
|
||||
if key == lastKey {
|
||||
lastKey = sessionKey{}
|
||||
}
|
||||
delete(l.sessions, key)
|
||||
case <-l.die:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Listener) receiver(ch chan<- inPacket) {
|
||||
for {
|
||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
|
||||
select {
|
||||
case ch <- inPacket{from, data[:n]}:
|
||||
case <-l.die:
|
||||
return
|
||||
}
|
||||
} else if err != nil {
|
||||
return
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SetReadBuffer sets the socket read buffer for the Listener
|
||||
func (l *Listener) SetReadBuffer(bytes int) error {
|
||||
if nc, ok := l.conn.(setReadBuffer); ok {
|
||||
return nc.SetReadBuffer(bytes)
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// SetWriteBuffer sets the socket write buffer for the Listener
|
||||
func (l *Listener) SetWriteBuffer(bytes int) error {
|
||||
if nc, ok := l.conn.(setWriteBuffer); ok {
|
||||
return nc.SetWriteBuffer(bytes)
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// SetDSCP sets the 6bit DSCP field of IP header
|
||||
func (l *Listener) SetDSCP(dscp int) error {
|
||||
if nc, ok := l.conn.(net.Conn); ok {
|
||||
return ipv4.NewConn(nc).SetTOS(dscp << 2)
|
||||
}
|
||||
return errors.New(errInvalidOperation)
|
||||
}
|
||||
|
||||
// Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn.
|
||||
func (l *Listener) Accept() (net.Conn, error) {
|
||||
return l.AcceptKCP()
|
||||
}
|
||||
|
||||
// AcceptKCP accepts a KCP connection
|
||||
func (l *Listener) AcceptKCP() (*UDPSession, error) {
|
||||
var timeout <-chan time.Time
|
||||
if tdeadline, ok := l.rd.Load().(time.Time); ok && !tdeadline.IsZero() {
|
||||
timeout = time.After(tdeadline.Sub(time.Now()))
|
||||
}
|
||||
|
||||
select {
|
||||
case <-timeout:
|
||||
return nil, &errTimeout{}
|
||||
case c := <-l.chAccepts:
|
||||
return c, nil
|
||||
case <-l.die:
|
||||
return nil, errors.New(errBrokenPipe)
|
||||
}
|
||||
}
|
||||
|
||||
// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
|
||||
func (l *Listener) SetDeadline(t time.Time) error {
|
||||
l.SetReadDeadline(t)
|
||||
l.SetWriteDeadline(t)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetReadDeadline implements the Conn SetReadDeadline method.
|
||||
func (l *Listener) SetReadDeadline(t time.Time) error {
|
||||
l.rd.Store(t)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetWriteDeadline implements the Conn SetWriteDeadline method.
|
||||
func (l *Listener) SetWriteDeadline(t time.Time) error {
|
||||
l.wd.Store(t)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close stops listening on the UDP address. Already Accepted connections are not closed.
|
||||
func (l *Listener) Close() error {
|
||||
close(l.die)
|
||||
return l.conn.Close()
|
||||
}
|
||||
|
||||
// closeSession notify the listener that a session has closed
|
||||
func (l *Listener) closeSession(key sessionKey) bool {
|
||||
select {
|
||||
case l.chSessionClosed <- key:
|
||||
return true
|
||||
case <-l.die:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
|
||||
func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
|
||||
|
||||
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
|
||||
func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
|
||||
|
||||
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
|
||||
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
|
||||
func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards int) (*Listener, error) {
|
||||
udpaddr, err := net.ResolveUDPAddr("udp", laddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||
}
|
||||
conn, err := net.ListenUDP("udp", udpaddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ListenUDP")
|
||||
}
|
||||
|
||||
return ServeConn(block, dataShards, parityShards, conn)
|
||||
}
|
||||
|
||||
// ServeConn serves KCP protocol for a single packet connection.
|
||||
func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
|
||||
l := new(Listener)
|
||||
l.conn = conn
|
||||
l.sessions = make(map[sessionKey]*UDPSession)
|
||||
l.chAccepts = make(chan *UDPSession, acceptBacklog)
|
||||
l.chSessionClosed = make(chan sessionKey)
|
||||
l.die = make(chan struct{})
|
||||
l.dataShards = dataShards
|
||||
l.parityShards = parityShards
|
||||
l.block = block
|
||||
l.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
|
||||
|
||||
// calculate header size
|
||||
if l.block != nil {
|
||||
l.headerSize += cryptHeaderSize
|
||||
}
|
||||
if l.fecDecoder != nil {
|
||||
l.headerSize += fecHeaderSizePlus2
|
||||
}
|
||||
|
||||
go l.monitor()
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// Dial connects to the remote address "raddr" on the network "udp"
|
||||
func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
|
||||
|
||||
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
|
||||
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
|
||||
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||
}
|
||||
|
||||
udpconn, err := net.DialUDP("udp", nil, udpaddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.DialUDP")
|
||||
}
|
||||
|
||||
return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn})
|
||||
}
|
||||
|
||||
// NewConn establishes a session and talks KCP protocol over a packet connection.
|
||||
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
|
||||
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||
}
|
||||
|
||||
var convid uint32
|
||||
binary.Read(rand.Reader, binary.LittleEndian, &convid)
|
||||
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
|
||||
}
|
||||
|
||||
func NewConnEx(convid uint32, connected bool, raddr string, block BlockCrypt, dataShards, parityShards int, conn *net.UDPConn) (*UDPSession, error) {
|
||||
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||
}
|
||||
|
||||
var pConn net.PacketConn = conn
|
||||
if connected {
|
||||
pConn = &connectedUDPConn{conn}
|
||||
}
|
||||
|
||||
return newUDPSession(convid, dataShards, parityShards, nil, pConn, udpaddr, block), nil
|
||||
}
|
||||
|
||||
// returns current time in milliseconds
|
||||
func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
|
||||
|
||||
// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
|
||||
// to Write syscalls that are 4 times faster on some OS'es. This should only be
|
||||
// used for connections that were produced by a net.Dial* call.
|
||||
type connectedUDPConn struct{ *net.UDPConn }
|
||||
|
||||
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
|
||||
func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }
|
164
vendor/github.com/fatedier/kcp-go/snmp.go
generated
vendored
Normal file
164
vendor/github.com/fatedier/kcp-go/snmp.go
generated
vendored
Normal file
@ -0,0 +1,164 @@
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Snmp defines network statistics indicator
|
||||
type Snmp struct {
|
||||
BytesSent uint64 // bytes sent from upper level
|
||||
BytesReceived uint64 // bytes received to upper level
|
||||
MaxConn uint64 // max number of connections ever reached
|
||||
ActiveOpens uint64 // accumulated active open connections
|
||||
PassiveOpens uint64 // accumulated passive open connections
|
||||
CurrEstab uint64 // current number of established connections
|
||||
InErrs uint64 // UDP read errors reported from net.PacketConn
|
||||
InCsumErrors uint64 // checksum errors from CRC32
|
||||
KCPInErrors uint64 // packet iput errors reported from KCP
|
||||
InPkts uint64 // incoming packets count
|
||||
OutPkts uint64 // outgoing packets count
|
||||
InSegs uint64 // incoming KCP segments
|
||||
OutSegs uint64 // outgoing KCP segments
|
||||
InBytes uint64 // UDP bytes received
|
||||
OutBytes uint64 // UDP bytes sent
|
||||
RetransSegs uint64 // accmulated retransmited segments
|
||||
FastRetransSegs uint64 // accmulated fast retransmitted segments
|
||||
EarlyRetransSegs uint64 // accmulated early retransmitted segments
|
||||
LostSegs uint64 // number of segs infered as lost
|
||||
RepeatSegs uint64 // number of segs duplicated
|
||||
FECRecovered uint64 // correct packets recovered from FEC
|
||||
FECErrs uint64 // incorrect packets recovered from FEC
|
||||
FECParityShards uint64 // FEC segments received
|
||||
FECShortShards uint64 // number of data shards that's not enough for recovery
|
||||
}
|
||||
|
||||
func newSnmp() *Snmp {
|
||||
return new(Snmp)
|
||||
}
|
||||
|
||||
// Header returns all field names
|
||||
func (s *Snmp) Header() []string {
|
||||
return []string{
|
||||
"BytesSent",
|
||||
"BytesReceived",
|
||||
"MaxConn",
|
||||
"ActiveOpens",
|
||||
"PassiveOpens",
|
||||
"CurrEstab",
|
||||
"InErrs",
|
||||
"InCsumErrors",
|
||||
"KCPInErrors",
|
||||
"InPkts",
|
||||
"OutPkts",
|
||||
"InSegs",
|
||||
"OutSegs",
|
||||
"InBytes",
|
||||
"OutBytes",
|
||||
"RetransSegs",
|
||||
"FastRetransSegs",
|
||||
"EarlyRetransSegs",
|
||||
"LostSegs",
|
||||
"RepeatSegs",
|
||||
"FECParityShards",
|
||||
"FECErrs",
|
||||
"FECRecovered",
|
||||
"FECShortShards",
|
||||
}
|
||||
}
|
||||
|
||||
// ToSlice returns current snmp info as slice
|
||||
func (s *Snmp) ToSlice() []string {
|
||||
snmp := s.Copy()
|
||||
return []string{
|
||||
fmt.Sprint(snmp.BytesSent),
|
||||
fmt.Sprint(snmp.BytesReceived),
|
||||
fmt.Sprint(snmp.MaxConn),
|
||||
fmt.Sprint(snmp.ActiveOpens),
|
||||
fmt.Sprint(snmp.PassiveOpens),
|
||||
fmt.Sprint(snmp.CurrEstab),
|
||||
fmt.Sprint(snmp.InErrs),
|
||||
fmt.Sprint(snmp.InCsumErrors),
|
||||
fmt.Sprint(snmp.KCPInErrors),
|
||||
fmt.Sprint(snmp.InPkts),
|
||||
fmt.Sprint(snmp.OutPkts),
|
||||
fmt.Sprint(snmp.InSegs),
|
||||
fmt.Sprint(snmp.OutSegs),
|
||||
fmt.Sprint(snmp.InBytes),
|
||||
fmt.Sprint(snmp.OutBytes),
|
||||
fmt.Sprint(snmp.RetransSegs),
|
||||
fmt.Sprint(snmp.FastRetransSegs),
|
||||
fmt.Sprint(snmp.EarlyRetransSegs),
|
||||
fmt.Sprint(snmp.LostSegs),
|
||||
fmt.Sprint(snmp.RepeatSegs),
|
||||
fmt.Sprint(snmp.FECParityShards),
|
||||
fmt.Sprint(snmp.FECErrs),
|
||||
fmt.Sprint(snmp.FECRecovered),
|
||||
fmt.Sprint(snmp.FECShortShards),
|
||||
}
|
||||
}
|
||||
|
||||
// Copy make a copy of current snmp snapshot
|
||||
func (s *Snmp) Copy() *Snmp {
|
||||
d := newSnmp()
|
||||
d.BytesSent = atomic.LoadUint64(&s.BytesSent)
|
||||
d.BytesReceived = atomic.LoadUint64(&s.BytesReceived)
|
||||
d.MaxConn = atomic.LoadUint64(&s.MaxConn)
|
||||
d.ActiveOpens = atomic.LoadUint64(&s.ActiveOpens)
|
||||
d.PassiveOpens = atomic.LoadUint64(&s.PassiveOpens)
|
||||
d.CurrEstab = atomic.LoadUint64(&s.CurrEstab)
|
||||
d.InErrs = atomic.LoadUint64(&s.InErrs)
|
||||
d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors)
|
||||
d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors)
|
||||
d.InPkts = atomic.LoadUint64(&s.InPkts)
|
||||
d.OutPkts = atomic.LoadUint64(&s.OutPkts)
|
||||
d.InSegs = atomic.LoadUint64(&s.InSegs)
|
||||
d.OutSegs = atomic.LoadUint64(&s.OutSegs)
|
||||
d.InBytes = atomic.LoadUint64(&s.InBytes)
|
||||
d.OutBytes = atomic.LoadUint64(&s.OutBytes)
|
||||
d.RetransSegs = atomic.LoadUint64(&s.RetransSegs)
|
||||
d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs)
|
||||
d.EarlyRetransSegs = atomic.LoadUint64(&s.EarlyRetransSegs)
|
||||
d.LostSegs = atomic.LoadUint64(&s.LostSegs)
|
||||
d.RepeatSegs = atomic.LoadUint64(&s.RepeatSegs)
|
||||
d.FECParityShards = atomic.LoadUint64(&s.FECParityShards)
|
||||
d.FECErrs = atomic.LoadUint64(&s.FECErrs)
|
||||
d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
|
||||
d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
|
||||
return d
|
||||
}
|
||||
|
||||
// Reset values to zero
|
||||
func (s *Snmp) Reset() {
|
||||
atomic.StoreUint64(&s.BytesSent, 0)
|
||||
atomic.StoreUint64(&s.BytesReceived, 0)
|
||||
atomic.StoreUint64(&s.MaxConn, 0)
|
||||
atomic.StoreUint64(&s.ActiveOpens, 0)
|
||||
atomic.StoreUint64(&s.PassiveOpens, 0)
|
||||
atomic.StoreUint64(&s.CurrEstab, 0)
|
||||
atomic.StoreUint64(&s.InErrs, 0)
|
||||
atomic.StoreUint64(&s.InCsumErrors, 0)
|
||||
atomic.StoreUint64(&s.KCPInErrors, 0)
|
||||
atomic.StoreUint64(&s.InPkts, 0)
|
||||
atomic.StoreUint64(&s.OutPkts, 0)
|
||||
atomic.StoreUint64(&s.InSegs, 0)
|
||||
atomic.StoreUint64(&s.OutSegs, 0)
|
||||
atomic.StoreUint64(&s.InBytes, 0)
|
||||
atomic.StoreUint64(&s.OutBytes, 0)
|
||||
atomic.StoreUint64(&s.RetransSegs, 0)
|
||||
atomic.StoreUint64(&s.FastRetransSegs, 0)
|
||||
atomic.StoreUint64(&s.EarlyRetransSegs, 0)
|
||||
atomic.StoreUint64(&s.LostSegs, 0)
|
||||
atomic.StoreUint64(&s.RepeatSegs, 0)
|
||||
atomic.StoreUint64(&s.FECParityShards, 0)
|
||||
atomic.StoreUint64(&s.FECErrs, 0)
|
||||
atomic.StoreUint64(&s.FECRecovered, 0)
|
||||
atomic.StoreUint64(&s.FECShortShards, 0)
|
||||
}
|
||||
|
||||
// DefaultSnmp is the global KCP connection statistics collector
|
||||
var DefaultSnmp *Snmp
|
||||
|
||||
func init() {
|
||||
DefaultSnmp = newSnmp()
|
||||
}
|
105
vendor/github.com/fatedier/kcp-go/updater.go
generated
vendored
Normal file
105
vendor/github.com/fatedier/kcp-go/updater.go
generated
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var updater updateHeap
|
||||
|
||||
func init() {
|
||||
updater.init()
|
||||
go updater.updateTask()
|
||||
}
|
||||
|
||||
// entry contains a session update info
|
||||
type entry struct {
|
||||
ts time.Time
|
||||
s *UDPSession
|
||||
}
|
||||
|
||||
// a global heap managed kcp.flush() caller
|
||||
type updateHeap struct {
|
||||
entries []entry
|
||||
mu sync.Mutex
|
||||
chWakeUp chan struct{}
|
||||
}
|
||||
|
||||
func (h *updateHeap) Len() int { return len(h.entries) }
|
||||
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
|
||||
func (h *updateHeap) Swap(i, j int) {
|
||||
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
|
||||
h.entries[i].s.updaterIdx = i
|
||||
h.entries[j].s.updaterIdx = j
|
||||
}
|
||||
|
||||
func (h *updateHeap) Push(x interface{}) {
|
||||
h.entries = append(h.entries, x.(entry))
|
||||
n := len(h.entries)
|
||||
h.entries[n-1].s.updaterIdx = n - 1
|
||||
}
|
||||
|
||||
func (h *updateHeap) Pop() interface{} {
|
||||
n := len(h.entries)
|
||||
x := h.entries[n-1]
|
||||
h.entries[n-1].s.updaterIdx = -1
|
||||
h.entries[n-1] = entry{} // manual set nil for GC
|
||||
h.entries = h.entries[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (h *updateHeap) init() {
|
||||
h.chWakeUp = make(chan struct{}, 1)
|
||||
}
|
||||
|
||||
func (h *updateHeap) addSession(s *UDPSession) {
|
||||
h.mu.Lock()
|
||||
heap.Push(h, entry{time.Now(), s})
|
||||
h.mu.Unlock()
|
||||
h.wakeup()
|
||||
}
|
||||
|
||||
func (h *updateHeap) removeSession(s *UDPSession) {
|
||||
h.mu.Lock()
|
||||
if s.updaterIdx != -1 {
|
||||
heap.Remove(h, s.updaterIdx)
|
||||
}
|
||||
h.mu.Unlock()
|
||||
}
|
||||
|
||||
func (h *updateHeap) wakeup() {
|
||||
select {
|
||||
case h.chWakeUp <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (h *updateHeap) updateTask() {
|
||||
var timer <-chan time.Time
|
||||
for {
|
||||
select {
|
||||
case <-timer:
|
||||
case <-h.chWakeUp:
|
||||
}
|
||||
|
||||
h.mu.Lock()
|
||||
hlen := h.Len()
|
||||
now := time.Now()
|
||||
for i := 0; i < hlen; i++ {
|
||||
entry := heap.Pop(h).(entry)
|
||||
if now.After(entry.ts) {
|
||||
entry.ts = now.Add(entry.s.update())
|
||||
heap.Push(h, entry)
|
||||
} else {
|
||||
heap.Push(h, entry)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if hlen > 0 {
|
||||
timer = time.After(h.entries[0].ts.Sub(now))
|
||||
}
|
||||
h.mu.Unlock()
|
||||
}
|
||||
}
|
110
vendor/github.com/fatedier/kcp-go/xor.go
generated
vendored
Normal file
110
vendor/github.com/fatedier/kcp-go/xor.go
generated
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package kcp
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||
|
||||
// fastXORBytes xors in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
func fastXORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
wordBytes := w * wordSize
|
||||
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
|
||||
}
|
||||
|
||||
for i := (n - n%wordSize); i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func safeXORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dst := dst[i : i+8]
|
||||
_a := a[i : i+8]
|
||||
_b := b[i : i+8]
|
||||
_dst[0] = _a[0] ^ _b[0]
|
||||
_dst[1] = _a[1] ^ _b[1]
|
||||
_dst[2] = _a[2] ^ _b[2]
|
||||
_dst[3] = _a[3] ^ _b[3]
|
||||
|
||||
_dst[4] = _a[4] ^ _b[4]
|
||||
_dst[5] = _a[5] ^ _b[5]
|
||||
_dst[6] = _a[6] ^ _b[6]
|
||||
_dst[7] = _a[7] ^ _b[7]
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination is assumed to have enough
|
||||
// space. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
if supportsUnaligned {
|
||||
return fastXORBytes(dst, a, b)
|
||||
}
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastXORBytes. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
return safeXORBytes(dst, a, b)
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The arguments are assumed to be of equal length.
|
||||
func fastXORWords(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dw := dw[i : i+8]
|
||||
_aw := aw[i : i+8]
|
||||
_bw := bw[i : i+8]
|
||||
_dw[0] = _aw[0] ^ _bw[0]
|
||||
_dw[1] = _aw[1] ^ _bw[1]
|
||||
_dw[2] = _aw[2] ^ _bw[2]
|
||||
_dw[3] = _aw[3] ^ _bw[3]
|
||||
_dw[4] = _aw[4] ^ _bw[4]
|
||||
_dw[5] = _aw[5] ^ _bw[5]
|
||||
_dw[6] = _aw[6] ^ _bw[6]
|
||||
_dw[7] = _aw[7] ^ _bw[7]
|
||||
}
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
if supportsUnaligned {
|
||||
fastXORWords(dst, a, b)
|
||||
} else {
|
||||
safeXORBytes(dst, a, b)
|
||||
}
|
||||
}
|
14
vendor/github.com/templexxx/cpufeat/.gitignore
generated
vendored
Normal file
14
vendor/github.com/templexxx/cpufeat/.gitignore
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
|
||||
.glide/
|
27
vendor/github.com/templexxx/cpufeat/LICENSE
generated
vendored
Normal file
27
vendor/github.com/templexxx/cpufeat/LICENSE
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
vendor/github.com/templexxx/cpufeat/cpu.go
generated
vendored
Normal file
32
vendor/github.com/templexxx/cpufeat/cpu.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package cpu implements processor feature detection
|
||||
// used by the Go standard libary.
|
||||
package cpufeat
|
||||
|
||||
var X86 x86
|
||||
|
||||
// The booleans in x86 contain the correspondingly named cpuid feature bit.
|
||||
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
|
||||
// in addition to the cpuid feature bit being set.
|
||||
// The struct is padded to avoid false sharing.
|
||||
type x86 struct {
|
||||
_ [CacheLineSize]byte
|
||||
HasAES bool
|
||||
HasAVX bool
|
||||
HasAVX2 bool
|
||||
HasBMI1 bool
|
||||
HasBMI2 bool
|
||||
HasERMS bool
|
||||
HasOSXSAVE bool
|
||||
HasPCLMULQDQ bool
|
||||
HasPOPCNT bool
|
||||
HasSSE2 bool
|
||||
HasSSE3 bool
|
||||
HasSSSE3 bool
|
||||
HasSSE41 bool
|
||||
HasSSE42 bool
|
||||
_ [CacheLineSize]byte
|
||||
}
|
7
vendor/github.com/templexxx/cpufeat/cpu_arm.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_arm.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_arm64.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_arm64.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_mips.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_mips.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_mips64.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_mips64.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 32
|
7
vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 128
|
7
vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 128
|
7
vendor/github.com/templexxx/cpufeat/cpu_s390x.go
generated
vendored
Normal file
7
vendor/github.com/templexxx/cpufeat/cpu_s390x.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 256
|
59
vendor/github.com/templexxx/cpufeat/cpu_x86.go
generated
vendored
Normal file
59
vendor/github.com/templexxx/cpufeat/cpu_x86.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
package cpufeat
|
||||
|
||||
const CacheLineSize = 64
|
||||
|
||||
// cpuid is implemented in cpu_x86.s.
|
||||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
|
||||
// xgetbv with ecx = 0 is implemented in cpu_x86.s.
|
||||
func xgetbv() (eax, edx uint32)
|
||||
|
||||
func init() {
|
||||
maxId, _, _, _ := cpuid(0, 0)
|
||||
|
||||
if maxId < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
||||
X86.HasSSE2 = isSet(26, edx1)
|
||||
|
||||
X86.HasSSE3 = isSet(0, ecx1)
|
||||
X86.HasPCLMULQDQ = isSet(1, ecx1)
|
||||
X86.HasSSSE3 = isSet(9, ecx1)
|
||||
X86.HasSSE41 = isSet(19, ecx1)
|
||||
X86.HasSSE42 = isSet(20, ecx1)
|
||||
X86.HasPOPCNT = isSet(23, ecx1)
|
||||
X86.HasAES = isSet(25, ecx1)
|
||||
X86.HasOSXSAVE = isSet(27, ecx1)
|
||||
|
||||
osSupportsAVX := false
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
if X86.HasOSXSAVE {
|
||||
eax, _ := xgetbv()
|
||||
// Check if XMM and YMM registers have OS support.
|
||||
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
|
||||
}
|
||||
|
||||
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
|
||||
|
||||
if maxId < 7 {
|
||||
return
|
||||
}
|
||||
|
||||
_, ebx7, _, _ := cpuid(7, 0)
|
||||
X86.HasBMI1 = isSet(3, ebx7)
|
||||
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
|
||||
X86.HasBMI2 = isSet(8, ebx7)
|
||||
X86.HasERMS = isSet(9, ebx7)
|
||||
}
|
||||
|
||||
func isSet(bitpos uint, value uint32) bool {
|
||||
return value&(1<<bitpos) != 0
|
||||
}
|
32
vendor/github.com/templexxx/cpufeat/cpu_x86.s
generated
vendored
Normal file
32
vendor/github.com/templexxx/cpufeat/cpu_x86.s
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build 386 amd64 amd64p32
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid(SB), NOSPLIT, $0-24
|
||||
MOVL eaxArg+0(FP), AX
|
||||
MOVL ecxArg+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func xgetbv() (eax, edx uint32)
|
||||
TEXT ·xgetbv(SB),NOSPLIT,$0-8
|
||||
#ifdef GOOS_nacl
|
||||
// nacl does not support XGETBV.
|
||||
MOVL $0, eax+0(FP)
|
||||
MOVL $0, edx+4(FP)
|
||||
#else
|
||||
MOVL $0, CX
|
||||
WORD $0x010f; BYTE $0xd0 //XGETBV
|
||||
MOVL AX, eax+0(FP)
|
||||
MOVL DX, edx+4(FP)
|
||||
#endif
|
||||
RET
|
40
vendor/github.com/templexxx/reedsolomon/.gitignore
generated
vendored
Normal file
40
vendor/github.com/templexxx/reedsolomon/.gitignore
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
/.idea
|
||||
/backup
|
||||
/loopunroll/
|
||||
cpu.out
|
||||
mathtool/galois/
|
||||
mathtool/matrix/
|
||||
mem.out
|
||||
/examples/
|
||||
/.DS_Store
|
||||
/mathtool/cntinverse
|
||||
/invert
|
||||
/bakcup
|
||||
/buf.svg
|
||||
*.svg
|
||||
*.out
|
||||
/escape
|
9
vendor/github.com/templexxx/reedsolomon/.travis.yml
generated
vendored
Normal file
9
vendor/github.com/templexxx/reedsolomon/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
language: go
|
||||
go:
|
||||
- 1.9
|
||||
|
||||
install:
|
||||
- go get github.com/templexxx/reedsolomon
|
||||
|
||||
script:
|
||||
- go test -v
|
23
vendor/github.com/templexxx/reedsolomon/LICENSE
generated
vendored
Normal file
23
vendor/github.com/templexxx/reedsolomon/LICENSE
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Templexxx
|
||||
Copyright (c) 2015 Klaus Post
|
||||
Copyright (c) 2015 Backblaze
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
109
vendor/github.com/templexxx/reedsolomon/README.md
generated
vendored
Normal file
109
vendor/github.com/templexxx/reedsolomon/README.md
generated
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
# Reed-Solomon
|
||||
|
||||
[![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8]
|
||||
|
||||
[1]: https://godoc.org/github.com/templexxx/reedsolomon?status.svg
|
||||
[2]: https://godoc.org/github.com/templexxx/reedsolomon
|
||||
[3]: https://img.shields.io/badge/license-MIT-blue.svg
|
||||
[4]: LICENSE
|
||||
[5]: https://travis-ci.org/templexxx/reedsolomon.svg?branch=master
|
||||
[6]: https://travis-ci.org/templexxx/reedsolomon
|
||||
[7]: https://goreportcard.com/badge/github.com/templexxx/reedsolomon
|
||||
[8]: https://goreportcard.com/report/github.com/templexxx/reedsolomon
|
||||
|
||||
|
||||
## Introduction:
|
||||
1. Reed-Solomon Erasure Code engine in pure Go.
|
||||
2. Super Fast: more than 10GB/s per physics core ( 10+4, 4KB per vector, Macbook Pro 2.8 GHz Intel Core i7 )
|
||||
|
||||
## Installation
|
||||
To get the package use the standard:
|
||||
```bash
|
||||
go get github.com/templexxx/reedsolomon
|
||||
```
|
||||
|
||||
## Documentation
|
||||
See the associated [GoDoc](http://godoc.org/github.com/templexxx/reedsolomon)
|
||||
|
||||
## Specification
|
||||
### GOARCH
|
||||
1. All arch are supported
|
||||
2. 0.1.0 need go1.9 for sync.Map in AMD64
|
||||
|
||||
### Math
|
||||
1. Coding over in GF(2^8)
|
||||
2. Primitive Polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x1d)
|
||||
3. mathtool/gentbls.go : generator Primitive Polynomial and it's log table, exp table, multiply table, inverse table etc. We can get more info about how galois field work
|
||||
4. mathtool/cntinverse.go : calculate how many inverse matrix will have in different RS codes config
|
||||
5. Both of Cauchy and Vandermonde Matrix are supported. Vandermonde need more operations for preserving the property that any square subset of rows is invertible
|
||||
|
||||
### Why so fast?
|
||||
These three parts will cost too much time:
|
||||
|
||||
1. lookup galois-field tables
|
||||
2. read/write memory
|
||||
3. calculate inverse matrix in the reconstruct process
|
||||
|
||||
SIMD will solve no.1
|
||||
|
||||
Cache-friendly codes will help to solve no.2 & no.3, and more, use a sync.Map for cache inverse matrix, it will help to save about 1000ns when we need same matrix.
|
||||
|
||||
## Performance
|
||||
|
||||
Performance depends mainly on:
|
||||
|
||||
1. CPU instruction extension( AVX2 or SSSE3 or none )
|
||||
2. number of data/parity vects
|
||||
3. unit size of calculation ( see it in rs_amd64.go )
|
||||
4. size of shards
|
||||
5. speed of memory (waste so much time on read/write mem, :D )
|
||||
6. performance of CPU
|
||||
7. the way of using ( reuse memory)
|
||||
|
||||
And we must know the benchmark test is quite different with encoding/decoding in practice.
|
||||
|
||||
Because in benchmark test loops, the CPU Cache will help a lot. In practice, we must reuse the memory to make the performance become as good as the benchmark test.
|
||||
|
||||
Example of performance on my MacBook 2017 i7 2.8GHz. 10+4 (with 0.1.0).
|
||||
|
||||
### Encoding:
|
||||
|
||||
| Vector size | Speed (MB/S) |
|
||||
|----------------|--------------|
|
||||
| 1400B | 7655.02 |
|
||||
| 4KB | 10551.37 |
|
||||
| 64KB | 9297.25 |
|
||||
| 1MB | 6829.89 |
|
||||
| 16MB | 6312.83 |
|
||||
|
||||
### Reconstruct (use nil to point which one need repair):
|
||||
|
||||
| Vector size | Speed (MB/S) |
|
||||
|----------------|--------------|
|
||||
| 1400B | 4124.85 |
|
||||
| 4KB | 5715.45 |
|
||||
| 64KB | 6050.06 |
|
||||
| 1MB | 5001.21 |
|
||||
| 16MB | 5043.04 |
|
||||
|
||||
### ReconstructWithPos (use a position list to point which one need repair, reuse the memory):
|
||||
|
||||
| Vector size | Speed (MB/S) |
|
||||
|----------------|--------------|
|
||||
| 1400B | 6170.24 |
|
||||
| 4KB | 9444.86 |
|
||||
| 64KB | 9311.30 |
|
||||
| 1MB | 6781.06 |
|
||||
| 16MB | 6285.34 |
|
||||
|
||||
**reconstruct benchmark tests here run with inverse matrix cache, if there is no cache, it will cost more time( about 1000ns)**
|
||||
|
||||
## Who is using this?
|
||||
|
||||
1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
|
||||
|
||||
## Links & Thanks
|
||||
* [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon)
|
||||
* [intel ISA-L](https://github.com/01org/isa-l)
|
||||
* [GF SIMD] (http://www.ssrc.ucsc.edu/papers/plank-fast13.pdf)
|
||||
* [asm2plan9s] (https://github.com/fwessels/asm2plan9s)
|
156
vendor/github.com/templexxx/reedsolomon/matrix.go
generated
vendored
Normal file
156
vendor/github.com/templexxx/reedsolomon/matrix.go
generated
vendored
Normal file
@ -0,0 +1,156 @@
|
||||
package reedsolomon
|
||||
|
||||
import "errors"
|
||||
|
||||
type matrix []byte
|
||||
|
||||
func genEncMatrixCauchy(d, p int) matrix {
|
||||
t := d + p
|
||||
m := make([]byte, t*d)
|
||||
for i := 0; i < d; i++ {
|
||||
m[i*d+i] = byte(1)
|
||||
}
|
||||
|
||||
d2 := d * d
|
||||
for i := d; i < t; i++ {
|
||||
for j := 0; j < d; j++ {
|
||||
d := i ^ j
|
||||
a := inverseTbl[d]
|
||||
m[d2] = byte(a)
|
||||
d2++
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func gfExp(b byte, n int) byte {
|
||||
if n == 0 {
|
||||
return 1
|
||||
}
|
||||
if b == 0 {
|
||||
return 0
|
||||
}
|
||||
a := logTbl[b]
|
||||
ret := int(a) * n
|
||||
for ret >= 255 {
|
||||
ret -= 255
|
||||
}
|
||||
return byte(expTbl[ret])
|
||||
}
|
||||
|
||||
func genVandMatrix(vm []byte, t, d int) {
|
||||
for i := 0; i < t; i++ {
|
||||
for j := 0; j < d; j++ {
|
||||
vm[i*d+j] = gfExp(byte(i), j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m matrix) mul(right matrix, rows, cols int, r []byte) {
|
||||
for i := 0; i < rows; i++ {
|
||||
for j := 0; j < cols; j++ {
|
||||
var v byte
|
||||
for k := 0; k < cols; k++ {
|
||||
v ^= gfMul(m[i*cols+k], right[k*cols+j])
|
||||
}
|
||||
r[i*cols+j] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genEncMatrixVand(d, p int) (matrix, error) {
|
||||
t := d + p
|
||||
buf := make([]byte, (2*t+4*d)*d)
|
||||
vm := buf[:t*d]
|
||||
genVandMatrix(vm, t, d)
|
||||
top := buf[t*d : (t+d)*d]
|
||||
copy(top, vm[:d*d])
|
||||
raw := buf[(t+d)*d : (t+3*d)*d]
|
||||
im := buf[(t+3*d)*d : (t+4*d)*d]
|
||||
err := matrix(top).invert(raw, d, im)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := buf[(t+4*d)*d : (2*t+4*d)*d]
|
||||
matrix(vm).mul(im, t, d, r)
|
||||
return matrix(r), nil
|
||||
}
|
||||
|
||||
// [I|m'] -> [m']
|
||||
func (m matrix) subMatrix(n int, r []byte) {
|
||||
for i := 0; i < n; i++ {
|
||||
off := i * n
|
||||
copy(r[off:off+n], m[2*off+n:2*(off+n)])
|
||||
}
|
||||
}
|
||||
|
||||
func (m matrix) invert(raw matrix, n int, im []byte) error {
|
||||
// [m] -> [m|I]
|
||||
for i := 0; i < n; i++ {
|
||||
t := i * n
|
||||
copy(raw[2*t:2*t+n], m[t:t+n])
|
||||
raw[2*t+i+n] = byte(1)
|
||||
}
|
||||
err := gauss(raw, n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
raw.subMatrix(n, im)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m matrix) swap(i, j, n int) {
|
||||
for k := 0; k < n; k++ {
|
||||
m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
|
||||
}
|
||||
}
|
||||
|
||||
func gfMul(a, b byte) byte {
|
||||
return mulTbl[a][b]
|
||||
}
|
||||
|
||||
var errSingular = errors.New("rs.invert: matrix is singular")
|
||||
|
||||
// [m|I] -> [I|m']
|
||||
func gauss(m matrix, n int) error {
|
||||
n2 := 2 * n
|
||||
for i := 0; i < n; i++ {
|
||||
if m[i*n2+i] == 0 {
|
||||
for j := i + 1; j < n; j++ {
|
||||
if m[j*n2+i] != 0 {
|
||||
m.swap(i, j, n2)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if m[i*n2+i] == 0 {
|
||||
return errSingular
|
||||
}
|
||||
if m[i*n2+i] != 1 {
|
||||
d := m[i*n2+i]
|
||||
scale := inverseTbl[d]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[i*n2+c] = gfMul(m[i*n2+c], scale)
|
||||
}
|
||||
}
|
||||
for j := i + 1; j < n; j++ {
|
||||
if m[j*n2+i] != 0 {
|
||||
scale := m[j*n2+i]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for k := 0; k < n; k++ {
|
||||
for j := 0; j < k; j++ {
|
||||
if m[j*n2+k] != 0 {
|
||||
scale := m[j*n2+k]
|
||||
for c := 0; c < n2; c++ {
|
||||
m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
280
vendor/github.com/templexxx/reedsolomon/rs.go
generated
vendored
Normal file
280
vendor/github.com/templexxx/reedsolomon/rs.go
generated
vendored
Normal file
@ -0,0 +1,280 @@
|
||||
/*
|
||||
Reed-Solomon Codes over GF(2^8)
|
||||
Primitive Polynomial: x^8+x^4+x^3+x^2+1
|
||||
Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
|
||||
*/
|
||||
|
||||
package reedsolomon
|
||||
|
||||
import "errors"
|
||||
|
||||
// Encoder implements for Reed-Solomon Encoding/Reconstructing
|
||||
type Encoder interface {
|
||||
// Encode multiply generator-matrix with data
|
||||
// len(vects) must be equal with num of data+parity
|
||||
Encode(vects [][]byte) error
|
||||
// Result of reconst will be put into origin position of vects
|
||||
// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
|
||||
|
||||
// Reconstruct repair lost data & parity
|
||||
// Set vect nil if lost
|
||||
Reconstruct(vects [][]byte) error
|
||||
// Reconstruct repair lost data
|
||||
// Set vect nil if lost
|
||||
ReconstructData(vects [][]byte) error
|
||||
// ReconstWithPos repair lost data&parity with has&lost vects position
|
||||
// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
|
||||
// As erasure codes, we must know which vect is broken,
|
||||
// so it's necessary to provide such APIs
|
||||
// len(has) must equal num of data vects
|
||||
// Example:
|
||||
// in 3+2, the whole position: [0,1,2,3,4]
|
||||
// if lost vects[0]
|
||||
// the "has" could be [1,2,3] or [1,2,4] or ...
|
||||
// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
|
||||
// the "dLost" will be [0]
|
||||
// ps:
|
||||
// 1. the above lists are in increasing orders TODO support out-of-order
|
||||
// 2. each vect has same len, don't set it nil
|
||||
// so we don't need to make slice
|
||||
ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
|
||||
//// ReconstWithPos repair lost data with survived&lost vects position
|
||||
//// Don't need to append position of parity lost into "lost"
|
||||
ReconstDataWithPos(vects [][]byte, has, dLost []int) error
|
||||
}
|
||||
|
||||
func checkCfg(d, p int) error {
|
||||
if (d <= 0) || (p <= 0) {
|
||||
return errors.New("rs.New: data or parity <= 0")
|
||||
}
|
||||
if d+p >= 256 {
|
||||
return errors.New("rs.New: data+parity >= 256")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New create an Encoder (vandermonde matrix as Encoding matrix)
|
||||
func New(data, parity int) (enc Encoder, err error) {
|
||||
err = checkCfg(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e, err := genEncMatrixVand(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return newRS(data, parity, e), nil
|
||||
}
|
||||
|
||||
// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
|
||||
func NewCauchy(data, parity int) (enc Encoder, err error) {
|
||||
err = checkCfg(data, parity)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e := genEncMatrixCauchy(data, parity)
|
||||
return newRS(data, parity, e), nil
|
||||
}
|
||||
|
||||
type encBase struct {
|
||||
data int
|
||||
parity int
|
||||
encode []byte
|
||||
gen []byte
|
||||
}
|
||||
|
||||
func checkEnc(d, p int, vs [][]byte) (size int, err error) {
|
||||
total := len(vs)
|
||||
if d+p != total {
|
||||
err = errors.New("rs.checkER: vects not match rs args")
|
||||
return
|
||||
}
|
||||
size = len(vs[0])
|
||||
if size == 0 {
|
||||
err = errors.New("rs.checkER: vects size = 0")
|
||||
return
|
||||
}
|
||||
for i := 1; i < total; i++ {
|
||||
if len(vs[i]) != size {
|
||||
err = errors.New("rs.checkER: vects size mismatch")
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encBase) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
_, err = checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i], pv[j])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0], pv[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func mulVect(c byte, a, b []byte) {
|
||||
t := mulTbl[c]
|
||||
for i := 0; i < len(a); i++ {
|
||||
b[i] = t[a[i]]
|
||||
}
|
||||
}
|
||||
|
||||
func mulVectAdd(c byte, a, b []byte) {
|
||||
t := mulTbl[c]
|
||||
for i := 0; i < len(a); i++ {
|
||||
b[i] ^= t[a[i]]
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encBase) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
matrixbuf := make([]byte, 4*d*d+dCnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
etmp := &encBase{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.Encode(vtmp[:d+dCnt])
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encBase{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.Encode(vtmp[:d+pCnt])
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
// TODO check more, maybe element in has show in lost & deal with len(has) > d
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go
generated
vendored
Normal file
868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go
generated
vendored
Normal file
@ -0,0 +1,868 @@
|
||||
package reedsolomon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"github.com/templexxx/cpufeat"
|
||||
)
|
||||
|
||||
// SIMD Instruction Extensions
|
||||
const (
|
||||
none = iota
|
||||
avx2
|
||||
ssse3
|
||||
)
|
||||
|
||||
var extension = none
|
||||
|
||||
func init() {
|
||||
getEXT()
|
||||
}
|
||||
|
||||
func getEXT() {
|
||||
if cpufeat.X86.HasAVX2 {
|
||||
extension = avx2
|
||||
return
|
||||
} else if cpufeat.X86.HasSSSE3 {
|
||||
extension = ssse3
|
||||
return
|
||||
} else {
|
||||
extension = none
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
|
||||
|
||||
func initTbl(g matrix, rows, cols int, tbl []byte) {
|
||||
off := 0
|
||||
for i := 0; i < cols; i++ {
|
||||
for j := 0; j < rows; j++ {
|
||||
c := g[j*cols+i]
|
||||
t := lowhighTbl[c][:]
|
||||
copy32B(tbl[off:off+32], t)
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
|
||||
// In practice, data usually below 12, parity below 5
|
||||
func okCache(data, parity int) bool {
|
||||
if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type (
|
||||
encSSSE3 encSIMD
|
||||
encAVX2 encSIMD
|
||||
encSIMD struct {
|
||||
data int
|
||||
parity int
|
||||
encode matrix
|
||||
gen matrix
|
||||
tbl []byte
|
||||
// inverse matrix cache is design for small vect size ( < 4KB )
|
||||
// it will save time for calculating inverse matrix
|
||||
// but it's not so important for big vect size
|
||||
enableCache bool
|
||||
inverseCache iCache
|
||||
}
|
||||
iCache struct {
|
||||
sync.RWMutex
|
||||
data map[uint32][]byte
|
||||
}
|
||||
)
|
||||
|
||||
func newRS(d, p int, em matrix) (enc Encoder) {
|
||||
g := em[d*d:]
|
||||
if extension == none {
|
||||
return &encBase{data: d, parity: p, encode: em, gen: g}
|
||||
}
|
||||
t := make([]byte, d*p*32)
|
||||
initTbl(g, p, d, t)
|
||||
ok := okCache(d, p)
|
||||
if extension == avx2 {
|
||||
e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
|
||||
inverseCache: iCache{data: make(map[uint32][]byte)}}
|
||||
return e
|
||||
}
|
||||
e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
|
||||
inverseCache: iCache{data: make(map[uint32][]byte)}}
|
||||
return e
|
||||
}
|
||||
|
||||
// Size of sub-vector
|
||||
const unit int = 16 * 1024
|
||||
|
||||
func getDo(n int) int {
|
||||
if n < unit {
|
||||
c := n >> 4
|
||||
if c == 0 {
|
||||
return unit
|
||||
}
|
||||
return c << 4
|
||||
}
|
||||
return unit
|
||||
}
|
||||
|
||||
func (e *encAVX2) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMul(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemain(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func mulVectAVX2(tbl, d, p []byte)
|
||||
|
||||
//go:noescape
|
||||
func mulVectAddAVX2(tbl, d, p []byte)
|
||||
|
||||
func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
// may recalculate some data, but still improve a lot
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
} else {
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use generator-matrix but not tbls for encoding
|
||||
// it's design for reconstructing
|
||||
// for small vects, it cost to much time on initTbl, so drop it
|
||||
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
|
||||
func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMulGen(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemainGen(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
cnt := len(dLost)
|
||||
if !e.enableCache {
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
var ikey uint32
|
||||
for _, p := range has {
|
||||
ikey += 1 << uint8(p)
|
||||
}
|
||||
e.inverseCache.RLock()
|
||||
v, ok := e.inverseCache.data[ikey]
|
||||
if ok {
|
||||
im := v
|
||||
g := make([]byte, cnt*d)
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
return g, nil
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
e.inverseCache.Lock()
|
||||
e.inverseCache.data[ikey] = im
|
||||
e.inverseCache.Unlock()
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
g, err2 := e.makeGen(has, dLost)
|
||||
if err2 != nil {
|
||||
return
|
||||
}
|
||||
etmp := &encAVX2{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encAVX2{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) Encode(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMul(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemain(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func mulVectSSSE3(tbl, d, p []byte)
|
||||
|
||||
//go:noescape
|
||||
func mulVectAddSSSE3(tbl, d, p []byte)
|
||||
|
||||
func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
tbl := e.tbl
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
off := 0
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := tbl[off : off+32]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
off += 32
|
||||
}
|
||||
}
|
||||
} else {
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use generator-matrix but not tbls for encoding
|
||||
// it's design for reconstructing
|
||||
// for small vects, it cost to much time on initTbl, so drop it
|
||||
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
|
||||
func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
size, err := checkEnc(d, p, vects)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
dv := vects[:d]
|
||||
pv := vects[d:]
|
||||
start, end := 0, 0
|
||||
do := getDo(size)
|
||||
for start < size {
|
||||
end = start + do
|
||||
if end <= size {
|
||||
e.matrixMulGen(start, end, dv, pv)
|
||||
start = end
|
||||
} else {
|
||||
e.matrixMulRemainGen(start, size, dv, pv)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
|
||||
undone := end - start
|
||||
do := (undone >> 4) << 4
|
||||
d := e.data
|
||||
p := e.parity
|
||||
g := e.gen
|
||||
if do >= 16 {
|
||||
end2 := start + do
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
|
||||
}
|
||||
}
|
||||
}
|
||||
start = end
|
||||
}
|
||||
if undone > do {
|
||||
start2 := end - 16
|
||||
if start2 >= 0 {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
t := lowhighTbl[g[j*d+i]][:]
|
||||
if i != 0 {
|
||||
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
|
||||
} else {
|
||||
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < d; i++ {
|
||||
for j := 0; j < p; j++ {
|
||||
if i != 0 {
|
||||
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
|
||||
} else {
|
||||
mulVect(g[j*d], dv[0][start:], pv[j][start:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, false)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
|
||||
return e.reconstruct(vects, true)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, pLost, false)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
|
||||
return e.reconstWithPos(vects, has, dLost, nil, true)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
cnt := len(dLost)
|
||||
if !e.enableCache {
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
var ikey uint32
|
||||
for _, p := range has {
|
||||
ikey += 1 << uint8(p)
|
||||
}
|
||||
e.inverseCache.RLock()
|
||||
v, ok := e.inverseCache.data[ikey]
|
||||
if ok {
|
||||
im := v
|
||||
g := make([]byte, cnt*d)
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
return g, nil
|
||||
}
|
||||
e.inverseCache.RUnlock()
|
||||
matrixbuf := make([]byte, 4*d*d+cnt*d)
|
||||
m := matrixbuf[:d*d]
|
||||
for i, l := range has {
|
||||
copy(m[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
raw := matrixbuf[d*d : 3*d*d]
|
||||
im := matrixbuf[3*d*d : 4*d*d]
|
||||
err2 := matrix(m).invert(raw, d, im)
|
||||
if err2 != nil {
|
||||
return nil, err2
|
||||
}
|
||||
e.inverseCache.Lock()
|
||||
e.inverseCache.data[ikey] = im
|
||||
e.inverseCache.Unlock()
|
||||
g := matrixbuf[4*d*d:]
|
||||
for i, l := range dLost {
|
||||
copy(g[i*d:i*d+d], im[l*d:l*d+d])
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
em := e.encode
|
||||
dCnt := len(dLost)
|
||||
size := len(vects[has[0]])
|
||||
if dCnt != 0 {
|
||||
vtmp := make([][]byte, d+dCnt)
|
||||
for i, p := range has {
|
||||
vtmp[i] = vects[p]
|
||||
}
|
||||
for i, p := range dLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
g, err2 := e.makeGen(has, dLost)
|
||||
if err2 != nil {
|
||||
return
|
||||
}
|
||||
etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
|
||||
err2 = etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
if dataOnly {
|
||||
return
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt != 0 {
|
||||
g := make([]byte, pCnt*d)
|
||||
for i, l := range pLost {
|
||||
copy(g[i*d:i*d+d], em[l*d:l*d+d])
|
||||
}
|
||||
vtmp := make([][]byte, d+pCnt)
|
||||
for i := 0; i < d; i++ {
|
||||
vtmp[i] = vects[i]
|
||||
}
|
||||
for i, p := range pLost {
|
||||
if len(vects[p]) == 0 {
|
||||
vects[p] = make([]byte, size)
|
||||
}
|
||||
vtmp[i+d] = vects[p]
|
||||
}
|
||||
etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
|
||||
err2 := etmp.encodeGen(vtmp)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
if len(has) != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dCnt := len(dLost)
|
||||
if dCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
pCnt := len(pLost)
|
||||
if pCnt > p {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
||||
|
||||
func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
|
||||
d := e.data
|
||||
p := e.parity
|
||||
t := d + p
|
||||
listBuf := make([]int, t+p)
|
||||
has := listBuf[:d]
|
||||
dLost := listBuf[d:t]
|
||||
pLost := listBuf[t : t+p]
|
||||
hasCnt, dCnt, pCnt := 0, 0, 0
|
||||
for i := 0; i < t; i++ {
|
||||
if vects[i] != nil {
|
||||
if hasCnt < d {
|
||||
has[hasCnt] = i
|
||||
hasCnt++
|
||||
}
|
||||
} else {
|
||||
if i < d {
|
||||
if dCnt < p {
|
||||
dLost[dCnt] = i
|
||||
dCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
} else {
|
||||
if pCnt < p {
|
||||
pLost[pCnt] = i
|
||||
pCnt++
|
||||
} else {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasCnt != d {
|
||||
return errors.New("rs.Reconst: not enough vects")
|
||||
}
|
||||
dLost = dLost[:dCnt]
|
||||
pLost = pLost[:pCnt]
|
||||
return e.reconst(vects, has, dLost, pLost, dataOnly)
|
||||
}
|
401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s
generated
vendored
Normal file
401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s
generated
vendored
Normal file
@ -0,0 +1,401 @@
|
||||
// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define low_tbl Y0
|
||||
#define high_tbl Y1
|
||||
#define mask Y2
|
||||
#define in0 Y3
|
||||
#define in1 Y4
|
||||
#define in2 Y5
|
||||
#define in3 Y6
|
||||
#define in4 Y7
|
||||
#define in5 Y8
|
||||
#define in0_h Y10
|
||||
#define in1_h Y11
|
||||
#define in2_h Y12
|
||||
#define in3_h Y13
|
||||
#define in4_h Y14
|
||||
#define in5_h Y15
|
||||
|
||||
#define in BX
|
||||
#define out DI
|
||||
#define len R8
|
||||
#define pos R9
|
||||
|
||||
#define tmp0 R10
|
||||
|
||||
#define low_tblx X0
|
||||
#define high_tblx X1
|
||||
#define maskx X2
|
||||
#define in0x X3
|
||||
#define in0_hx X10
|
||||
#define tmp0x X9
|
||||
#define tmp1x X11
|
||||
#define tmp2x X12
|
||||
#define tmp3x X13
|
||||
|
||||
|
||||
// func mulVectAVX2(tbl, d, p []byte)
|
||||
TEXT ·mulVectAVX2(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
VMOVDQU (tmp0), low_tblx
|
||||
VMOVDQU 16(tmp0), high_tblx
|
||||
MOVB $0x0f, DX
|
||||
LONG $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
|
||||
VPBROADCASTB maskx, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
TESTQ $31, len
|
||||
JNZ one16b
|
||||
|
||||
ymm:
|
||||
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
|
||||
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
|
||||
VINSERTI128 $1, maskx, mask, mask
|
||||
TESTQ $255, len
|
||||
JNZ not_aligned
|
||||
|
||||
// 256bytes/loop
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop256b:
|
||||
VMOVDQU (in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, (out)(pos*1)
|
||||
|
||||
VMOVDQU 32(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VMOVDQU in1, 32(out)(pos*1)
|
||||
|
||||
VMOVDQU 64(in)(pos*1), in2
|
||||
VPSRLQ $4, in2, in2_h
|
||||
VPAND mask, in2_h, in2_h
|
||||
VPAND mask, in2, in2
|
||||
VPSHUFB in2_h, high_tbl, in2_h
|
||||
VPSHUFB in2, low_tbl, in2
|
||||
VPXOR in2, in2_h, in2
|
||||
VMOVDQU in2, 64(out)(pos*1)
|
||||
|
||||
VMOVDQU 96(in)(pos*1), in3
|
||||
VPSRLQ $4, in3, in3_h
|
||||
VPAND mask, in3_h, in3_h
|
||||
VPAND mask, in3, in3
|
||||
VPSHUFB in3_h, high_tbl, in3_h
|
||||
VPSHUFB in3, low_tbl, in3
|
||||
VPXOR in3, in3_h, in3
|
||||
VMOVDQU in3, 96(out)(pos*1)
|
||||
|
||||
VMOVDQU 128(in)(pos*1), in4
|
||||
VPSRLQ $4, in4, in4_h
|
||||
VPAND mask, in4_h, in4_h
|
||||
VPAND mask, in4, in4
|
||||
VPSHUFB in4_h, high_tbl, in4_h
|
||||
VPSHUFB in4, low_tbl, in4
|
||||
VPXOR in4, in4_h, in4
|
||||
VMOVDQU in4, 128(out)(pos*1)
|
||||
|
||||
VMOVDQU 160(in)(pos*1), in5
|
||||
VPSRLQ $4, in5, in5_h
|
||||
VPAND mask, in5_h, in5_h
|
||||
VPAND mask, in5, in5
|
||||
VPSHUFB in5_h, high_tbl, in5_h
|
||||
VPSHUFB in5, low_tbl, in5
|
||||
VPXOR in5, in5_h, in5
|
||||
VMOVDQU in5, 160(out)(pos*1)
|
||||
|
||||
VMOVDQU 192(in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, 192(out)(pos*1)
|
||||
|
||||
VMOVDQU 224(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VMOVDQU in1, 224(out)(pos*1)
|
||||
|
||||
ADDQ $256, pos
|
||||
CMPQ len, pos
|
||||
JNE loop256b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
not_aligned:
|
||||
MOVQ len, tmp0
|
||||
ANDQ $255, tmp0
|
||||
|
||||
loop32b:
|
||||
VMOVDQU -32(in)(len*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VMOVDQU in0, -32(out)(len*1)
|
||||
SUBQ $32, len
|
||||
SUBQ $32, tmp0
|
||||
JG loop32b
|
||||
CMPQ len, $256
|
||||
JGE aligned
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
one16b:
|
||||
VMOVDQU -16(in)(len*1), in0x
|
||||
VPSRLQ $4, in0x, in0_hx
|
||||
VPAND maskx, in0x, in0x
|
||||
VPAND maskx, in0_hx, in0_hx
|
||||
VPSHUFB in0_hx, high_tblx, in0_hx
|
||||
VPSHUFB in0x, low_tblx, in0x
|
||||
VPXOR in0x, in0_hx, in0x
|
||||
VMOVDQU in0x, -16(out)(len*1)
|
||||
SUBQ $16, len
|
||||
CMPQ len, $0
|
||||
JNE ymm
|
||||
RET
|
||||
|
||||
// func mulVectAddAVX2(tbl, d, p []byte)
|
||||
TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
VMOVDQU (tmp0), low_tblx
|
||||
VMOVDQU 16(tmp0), high_tblx
|
||||
MOVB $0x0f, DX
|
||||
LONG $0x2069e3c4; WORD $0x00d2
|
||||
VPBROADCASTB maskx, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
TESTQ $31, len
|
||||
JNZ one16b
|
||||
|
||||
ymm:
|
||||
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
|
||||
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
|
||||
VINSERTI128 $1, maskx, mask, mask
|
||||
TESTQ $255, len
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, pos
|
||||
|
||||
loop256b:
|
||||
VMOVDQU (in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR (out)(pos*1), in0, in0
|
||||
VMOVDQU in0, (out)(pos*1)
|
||||
|
||||
VMOVDQU 32(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VPXOR 32(out)(pos*1), in1, in1
|
||||
VMOVDQU in1, 32(out)(pos*1)
|
||||
|
||||
VMOVDQU 64(in)(pos*1), in2
|
||||
VPSRLQ $4, in2, in2_h
|
||||
VPAND mask, in2_h, in2_h
|
||||
VPAND mask, in2, in2
|
||||
VPSHUFB in2_h, high_tbl, in2_h
|
||||
VPSHUFB in2, low_tbl, in2
|
||||
VPXOR in2, in2_h, in2
|
||||
VPXOR 64(out)(pos*1), in2, in2
|
||||
VMOVDQU in2, 64(out)(pos*1)
|
||||
|
||||
VMOVDQU 96(in)(pos*1), in3
|
||||
VPSRLQ $4, in3, in3_h
|
||||
VPAND mask, in3_h, in3_h
|
||||
VPAND mask, in3, in3
|
||||
VPSHUFB in3_h, high_tbl, in3_h
|
||||
VPSHUFB in3, low_tbl, in3
|
||||
VPXOR in3, in3_h, in3
|
||||
VPXOR 96(out)(pos*1), in3, in3
|
||||
VMOVDQU in3, 96(out)(pos*1)
|
||||
|
||||
VMOVDQU 128(in)(pos*1), in4
|
||||
VPSRLQ $4, in4, in4_h
|
||||
VPAND mask, in4_h, in4_h
|
||||
VPAND mask, in4, in4
|
||||
VPSHUFB in4_h, high_tbl, in4_h
|
||||
VPSHUFB in4, low_tbl, in4
|
||||
VPXOR in4, in4_h, in4
|
||||
VPXOR 128(out)(pos*1), in4, in4
|
||||
VMOVDQU in4, 128(out)(pos*1)
|
||||
|
||||
VMOVDQU 160(in)(pos*1), in5
|
||||
VPSRLQ $4, in5, in5_h
|
||||
VPAND mask, in5_h, in5_h
|
||||
VPAND mask, in5, in5
|
||||
VPSHUFB in5_h, high_tbl, in5_h
|
||||
VPSHUFB in5, low_tbl, in5
|
||||
VPXOR in5, in5_h, in5
|
||||
VPXOR 160(out)(pos*1), in5, in5
|
||||
VMOVDQU in5, 160(out)(pos*1)
|
||||
|
||||
VMOVDQU 192(in)(pos*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR 192(out)(pos*1), in0, in0
|
||||
VMOVDQU in0, 192(out)(pos*1)
|
||||
|
||||
VMOVDQU 224(in)(pos*1), in1
|
||||
VPSRLQ $4, in1, in1_h
|
||||
VPAND mask, in1_h, in1_h
|
||||
VPAND mask, in1, in1
|
||||
VPSHUFB in1_h, high_tbl, in1_h
|
||||
VPSHUFB in1, low_tbl, in1
|
||||
VPXOR in1, in1_h, in1
|
||||
VPXOR 224(out)(pos*1), in1, in1
|
||||
VMOVDQU in1, 224(out)(pos*1)
|
||||
|
||||
ADDQ $256, pos
|
||||
CMPQ len, pos
|
||||
JNE loop256b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
not_aligned:
|
||||
MOVQ len, tmp0
|
||||
ANDQ $255, tmp0
|
||||
|
||||
loop32b:
|
||||
VMOVDQU -32(in)(len*1), in0
|
||||
VPSRLQ $4, in0, in0_h
|
||||
VPAND mask, in0_h, in0_h
|
||||
VPAND mask, in0, in0
|
||||
VPSHUFB in0_h, high_tbl, in0_h
|
||||
VPSHUFB in0, low_tbl, in0
|
||||
VPXOR in0, in0_h, in0
|
||||
VPXOR -32(out)(len*1), in0, in0
|
||||
VMOVDQU in0, -32(out)(len*1)
|
||||
SUBQ $32, len
|
||||
SUBQ $32, tmp0
|
||||
JG loop32b
|
||||
CMPQ len, $256
|
||||
JGE aligned
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
one16b:
|
||||
VMOVDQU -16(in)(len*1), in0x
|
||||
VPSRLQ $4, in0x, in0_hx
|
||||
VPAND maskx, in0x, in0x
|
||||
VPAND maskx, in0_hx, in0_hx
|
||||
VPSHUFB in0_hx, high_tblx, in0_hx
|
||||
VPSHUFB in0x, low_tblx, in0x
|
||||
VPXOR in0x, in0_hx, in0x
|
||||
VPXOR -16(out)(len*1), in0x, in0x
|
||||
VMOVDQU in0x, -16(out)(len*1)
|
||||
SUBQ $16, len
|
||||
CMPQ len, $0
|
||||
JNE ymm
|
||||
RET
|
||||
|
||||
// func mulVectSSSE3(tbl, d, p []byte)
|
||||
TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
MOVOU (tmp0), low_tblx
|
||||
MOVOU 16(tmp0), high_tblx
|
||||
MOVB $15, tmp0
|
||||
MOVQ tmp0, maskx
|
||||
PXOR tmp0x, tmp0x
|
||||
PSHUFB tmp0x, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
SHRQ $4, len
|
||||
|
||||
loop:
|
||||
MOVOU (in), in0x
|
||||
MOVOU in0x, in0_hx
|
||||
PSRLQ $4, in0_hx
|
||||
PAND maskx, in0x
|
||||
PAND maskx, in0_hx
|
||||
MOVOU low_tblx, tmp1x
|
||||
MOVOU high_tblx, tmp2x
|
||||
PSHUFB in0x, tmp1x
|
||||
PSHUFB in0_hx, tmp2x
|
||||
PXOR tmp1x, tmp2x
|
||||
MOVOU tmp2x, (out)
|
||||
ADDQ $16, in
|
||||
ADDQ $16, out
|
||||
SUBQ $1, len
|
||||
JNZ loop
|
||||
RET
|
||||
|
||||
// func mulVectAddSSSE3(tbl, d, p []byte)
|
||||
TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
|
||||
MOVQ i+24(FP), in
|
||||
MOVQ o+48(FP), out
|
||||
MOVQ tbl+0(FP), tmp0
|
||||
MOVOU (tmp0), low_tblx
|
||||
MOVOU 16(tmp0), high_tblx
|
||||
MOVB $15, tmp0
|
||||
MOVQ tmp0, maskx
|
||||
PXOR tmp0x, tmp0x
|
||||
PSHUFB tmp0x, maskx
|
||||
MOVQ in_len+32(FP), len
|
||||
SHRQ $4, len
|
||||
|
||||
loop:
|
||||
MOVOU (in), in0x
|
||||
MOVOU in0x, in0_hx
|
||||
PSRLQ $4, in0_hx
|
||||
PAND maskx, in0x
|
||||
PAND maskx, in0_hx
|
||||
MOVOU low_tblx, tmp1x
|
||||
MOVOU high_tblx, tmp2x
|
||||
PSHUFB in0x, tmp1x
|
||||
PSHUFB in0_hx, tmp2x
|
||||
PXOR tmp1x, tmp2x
|
||||
MOVOU (out), tmp3x
|
||||
PXOR tmp3x, tmp2x
|
||||
MOVOU tmp2x, (out)
|
||||
ADDQ $16, in
|
||||
ADDQ $16, out
|
||||
SUBQ $1, len
|
||||
JNZ loop
|
||||
RET
|
||||
|
||||
// func copy32B(dst, src []byte)
|
||||
TEXT ·copy32B(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), SI
|
||||
MOVQ src+24(FP), DX
|
||||
MOVOU (DX), X0
|
||||
MOVOU 16(DX), X1
|
||||
MOVOU X0, (SI)
|
||||
MOVOU X1, 16(SI)
|
||||
RET
|
||||
|
8
vendor/github.com/templexxx/reedsolomon/rs_other.go
generated
vendored
Normal file
8
vendor/github.com/templexxx/reedsolomon/rs_other.go
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
// +build !amd64
|
||||
|
||||
package reedsolomon
|
||||
|
||||
func newRS(d, p int, em matrix) (enc Encoder) {
|
||||
g := em[d*d:]
|
||||
return &encBase{data: d, parity: p, encode: em, gen: g}
|
||||
}
|
44
vendor/github.com/templexxx/reedsolomon/tbl.go
generated
vendored
Normal file
44
vendor/github.com/templexxx/reedsolomon/tbl.go
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
1
vendor/github.com/templexxx/xor/.gitattributes
generated
vendored
Normal file
1
vendor/github.com/templexxx/xor/.gitattributes
generated
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.s linguist-language=go
|
18
vendor/github.com/templexxx/xor/.gitignore
generated
vendored
Normal file
18
vendor/github.com/templexxx/xor/.gitignore
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
|
||||
.glide/
|
||||
/backup/
|
||||
/backup2/
|
||||
/.idea
|
||||
/backup3/
|
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Temple3x
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
48
vendor/github.com/templexxx/xor/README.md
generated
vendored
Normal file
48
vendor/github.com/templexxx/xor/README.md
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
# XOR
|
||||
|
||||
XOR code engine in pure Go
|
||||
|
||||
more than 10GB/S per core
|
||||
|
||||
## Introduction:
|
||||
|
||||
1. Use SIMD (SSE2 or AVX2) for speeding up
|
||||
2. ...
|
||||
|
||||
## Installation
|
||||
To get the package use the standard:
|
||||
```bash
|
||||
go get github.com/templexxx/xor
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
See the associated [GoDoc](http://godoc.org/github.com/templexxx/xor)
|
||||
|
||||
|
||||
## Performance
|
||||
|
||||
Performance depends mainly on:
|
||||
|
||||
1. SIMD extension
|
||||
2. unit size of worker
|
||||
3. hardware ( CPU RAM etc)
|
||||
|
||||
Example of performance on my MacBook 2014-mid(i5-4278U 2.6GHz 2 physical cores). The 16MB per shards.
|
||||
```
|
||||
speed = ( shards * size ) / cost
|
||||
```
|
||||
| data_shards | shard_size |speed (MB/S) |
|
||||
|----------|----|-----|
|
||||
| 2 |1KB|64127.95 |
|
||||
|2|1400B|59657.55|
|
||||
|2|16KB|35370.84|
|
||||
| 2 | 16MB|12128.95 |
|
||||
| 5 |1KB| 78837.33 |
|
||||
|5|1400B|58054.89|
|
||||
|5|16KB|50161.19|
|
||||
|5| 16MB|12750.41|
|
||||
|
||||
## Who is using this?
|
||||
|
||||
1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
|
438
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
438
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,438 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// addr of mem
|
||||
#define DST BX
|
||||
#define SRC SI
|
||||
#define SRC0 TMP4
|
||||
#define SRC1 TMP5
|
||||
|
||||
// loop args
|
||||
// num of vect
|
||||
#define VECT CX
|
||||
#define LEN DX
|
||||
// pos of matrix
|
||||
#define POS R8
|
||||
|
||||
// tmp store
|
||||
// num of vect or ...
|
||||
#define TMP1 R9
|
||||
// pos of matrix or ...
|
||||
#define TMP2 R10
|
||||
// store addr of data/parity or ...
|
||||
#define TMP3 R11
|
||||
#define TMP4 R12
|
||||
#define TMP5 R13
|
||||
#define TMP6 R14
|
||||
|
||||
// func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $31, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop32b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
ADDQ $32, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop32b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $31, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $31, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $32
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2small(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
VMOVDQU Y1, 32(DST)(POS*1)
|
||||
VMOVDQU Y2, 64(DST)(POS*1)
|
||||
VMOVDQU Y3, 96(DST)(POS*1)
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $127, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2big(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||
LONG $0xe77da1c4; WORD $0x0304
|
||||
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
SFENCE
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $127, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixAVX2small(dst []byte, src [][]byte)
|
||||
TEXT ·matrixAVX2small(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y0
|
||||
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y4
|
||||
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||
VPXOR Y4, Y0, Y0
|
||||
VPXOR Y5, Y1, Y1
|
||||
VPXOR Y6, Y2, Y2
|
||||
VPXOR Y7, Y3, Y3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
VMOVDQU Y1, 32(DST)(POS*1)
|
||||
VMOVDQU Y2, 64(DST)(POS*1)
|
||||
VMOVDQU Y3, 96(DST)(POS*1)
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $127, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixAVX2big(dst []byte, src [][]byte)
|
||||
TEXT ·matrixAVX2big(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y0
|
||||
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y4
|
||||
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||
VPXOR Y4, Y0, Y0
|
||||
VPXOR Y5, Y1, Y1
|
||||
VPXOR Y6, Y2, Y2
|
||||
VPXOR Y7, Y3, Y3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
LONG $0xe77da1c4; WORD $0x0304 // VMOVNTDQ go1.8 has
|
||||
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $127, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
@ -0,0 +1,116 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xor
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||
|
||||
// xor the bytes in a and b. The destination is assumed to have enough space.
|
||||
func bytesNoSIMD(dst, a, b []byte, size int) {
|
||||
if supportsUnaligned {
|
||||
fastXORBytes(dst, a, b, size)
|
||||
} else {
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastXORBytes. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
safeXORBytes(dst, a, b, size)
|
||||
}
|
||||
}
|
||||
|
||||
// split slice for cache-friendly
|
||||
const unitSize = 16 * 1024
|
||||
|
||||
func matrixNoSIMD(dst []byte, src [][]byte) {
|
||||
size := len(src[0])
|
||||
start := 0
|
||||
do := unitSize
|
||||
for start < size {
|
||||
end := start + do
|
||||
if end <= size {
|
||||
partNoSIMD(start, end, dst, src)
|
||||
start = start + do
|
||||
} else {
|
||||
partNoSIMD(start, size, dst, src)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// split vect will improve performance with big data by reducing cache pollution
|
||||
func partNoSIMD(start, end int, dst []byte, src [][]byte) {
|
||||
bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
|
||||
for i := 2; i < len(src); i++ {
|
||||
bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORBytes xor in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
func fastXORBytes(dst, a, b []byte, n int) {
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
wordBytes := w * wordSize
|
||||
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
|
||||
}
|
||||
for i := n - n%wordSize; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
|
||||
func safeXORBytes(dst, a, b []byte, n int) {
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dst := dst[i : i+8]
|
||||
_a := a[i : i+8]
|
||||
_b := b[i : i+8]
|
||||
_dst[0] = _a[0] ^ _b[0]
|
||||
_dst[1] = _a[1] ^ _b[1]
|
||||
_dst[2] = _a[2] ^ _b[2]
|
||||
_dst[3] = _a[3] ^ _b[3]
|
||||
|
||||
_dst[4] = _a[4] ^ _b[4]
|
||||
_dst[5] = _a[5] ^ _b[5]
|
||||
_dst[6] = _a[6] ^ _b[6]
|
||||
_dst[7] = _a[7] ^ _b[7]
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The arguments are assumed to be of equal length.
|
||||
func fastXORWords(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dw := dw[i : i+8]
|
||||
_aw := aw[i : i+8]
|
||||
_bw := bw[i : i+8]
|
||||
_dw[0] = _aw[0] ^ _bw[0]
|
||||
_dw[1] = _aw[1] ^ _bw[1]
|
||||
_dw[2] = _aw[2] ^ _bw[2]
|
||||
_dw[3] = _aw[3] ^ _bw[3]
|
||||
_dw[4] = _aw[4] ^ _bw[4]
|
||||
_dw[5] = _aw[5] ^ _bw[5]
|
||||
_dw[6] = _aw[6] ^ _bw[6]
|
||||
_dw[7] = _aw[7] ^ _bw[7]
|
||||
}
|
||||
}
|
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,574 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// addr of mem
|
||||
#define DST BX
|
||||
#define SRC SI
|
||||
#define SRC0 TMP4
|
||||
#define SRC1 TMP5
|
||||
|
||||
// loop args
|
||||
// num of vect
|
||||
#define VECT CX
|
||||
#define LEN DX
|
||||
// pos of matrix
|
||||
#define POS R8
|
||||
|
||||
// tmp store
|
||||
// num of vect or ...
|
||||
#define TMP1 R9
|
||||
// pos of matrix or ...
|
||||
#define TMP2 R10
|
||||
// store addr of data/parity or ...
|
||||
#define TMP3 R11
|
||||
#define TMP4 R12
|
||||
#define TMP5 R13
|
||||
#define TMP6 R14
|
||||
|
||||
// func bytesSrc0(dst, src0, src1 []byte)
|
||||
TEXT ·xorSrc0(SB), NOSPLIT, $0
|
||||
MOVQ len+32(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSrc1(dst, src0, src1 []byte)
|
||||
TEXT ·xorSrc1(SB), NOSPLIT, $0
|
||||
MOVQ len+56(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
|
||||
// MOVOU (SRC1)(POS*1), X4
|
||||
// PXOR X4, X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2small(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
MOVOU 16(SRC0)(POS*1), X1
|
||||
MOVOU 32(SRC0)(POS*1), X2
|
||||
MOVOU 48(SRC0)(POS*1), X3
|
||||
|
||||
MOVOU (SRC1)(POS*1), X4
|
||||
MOVOU 16(SRC1)(POS*1), X5
|
||||
MOVOU 32(SRC1)(POS*1), X6
|
||||
MOVOU 48(SRC1)(POS*1), X7
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
MOVOU X1, 16(DST)(POS*1)
|
||||
MOVOU X2, 32(DST)(POS*1)
|
||||
MOVOU X3, 48(DST)(POS*1)
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $63, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2big(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
MOVOU 16(SRC0)(POS*1), X1
|
||||
MOVOU 32(SRC0)(POS*1), X2
|
||||
MOVOU 48(SRC0)(POS*1), X3
|
||||
|
||||
MOVOU (SRC1)(POS*1), X4
|
||||
MOVOU 16(SRC1)(POS*1), X5
|
||||
MOVOU 32(SRC1)(POS*1), X6
|
||||
MOVOU 48(SRC1)(POS*1), X7
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
|
||||
LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
|
||||
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $63, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixSSE2small(dst []byte, src [][]byte)
|
||||
TEXT ·matrixSSE2small(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X0
|
||||
MOVOU 16(TMP4)(POS*1), X1
|
||||
MOVOU 32(TMP3)(POS*1), X2
|
||||
MOVOU 48(TMP4)(POS*1), X3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X4
|
||||
MOVOU 16(TMP4)(POS*1), X5
|
||||
MOVOU 32(TMP3)(POS*1), X6
|
||||
MOVOU 48(TMP4)(POS*1), X7
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
MOVOU X1, 16(DST)(POS*1)
|
||||
MOVOU X2, 32(DST)(POS*1)
|
||||
MOVOU X3, 48(DST)(POS*1)
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $63, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixSSE2big(dst []byte, src [][]byte)
|
||||
TEXT ·matrixSSE2big(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X0
|
||||
MOVOU 16(TMP4)(POS*1), X1
|
||||
MOVOU 32(TMP3)(POS*1), X2
|
||||
MOVOU 48(TMP4)(POS*1), X3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X4
|
||||
MOVOU 16(TMP4)(POS*1), X5
|
||||
MOVOU 32(TMP3)(POS*1), X6
|
||||
MOVOU 48(TMP4)(POS*1), X7
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
LONG $0xe70f4266; WORD $0x0304
|
||||
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $63, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
TEXT ·hasSSE2(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $26, DX
|
||||
ANDQ $1, DX
|
||||
MOVB DX, ret+0(FP)
|
||||
RET
|
||||
|
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
package xor
|
||||
|
||||
// SIMD Extensions
|
||||
const (
|
||||
none = iota
|
||||
avx2
|
||||
// first introduced by Intel with the initial version of the Pentium 4 in 2001
|
||||
// so I think we can assume all amd64 has sse2
|
||||
sse2
|
||||
)
|
||||
|
||||
var extension = none
|
||||
|
||||
// Bytes : chose the shortest one as xor size
|
||||
// it's better to use it for big data ( > 64bytes )
|
||||
func Bytes(dst, src0, src1 []byte) {
|
||||
size := len(dst)
|
||||
if size > len(src0) {
|
||||
size = len(src0)
|
||||
}
|
||||
if size > len(src1) {
|
||||
size = len(src1)
|
||||
}
|
||||
xorBytes(dst, src0, src1, size)
|
||||
}
|
||||
|
||||
// BytesSameLen : all slice's length must be equal
|
||||
// cut size branch, save time for small data
|
||||
func BytesSameLen(dst, src0, src1 []byte) {
|
||||
xorSrc1(dst, src0, src1)
|
||||
}
|
||||
|
||||
// BytesSrc0 : src1 >= src0, dst >= src0
|
||||
// xor src0's len bytes
|
||||
func BytesSrc0(dst, src0, src1 []byte) {
|
||||
xorSrc0(dst, src0, src1)
|
||||
}
|
||||
|
||||
// BytesSrc1 : src0 >= src1, dst >= src1
|
||||
// xor src1's len bytes
|
||||
func BytesSrc1(dst, src0, src1 []byte) {
|
||||
xorSrc1(dst, src0, src1)
|
||||
}
|
||||
|
||||
// Matrix : all slice's length must be equal && != 0
|
||||
// len(src) must >= 2
|
||||
func Matrix(dst []byte, src [][]byte) {
|
||||
xorMatrix(dst, src)
|
||||
}
|
120
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
120
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
@ -0,0 +1,120 @@
|
||||
package xor
|
||||
|
||||
import "github.com/templexxx/cpufeat"
|
||||
|
||||
func init() {
|
||||
getEXT()
|
||||
}
|
||||
|
||||
func getEXT() {
|
||||
if cpufeat.X86.HasAVX2 {
|
||||
extension = avx2
|
||||
} else {
|
||||
extension = sse2
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||
switch extension {
|
||||
case avx2:
|
||||
bytesAVX2(dst, src0, src1, size)
|
||||
default:
|
||||
bytesSSE2(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
// non-temporal hint store
|
||||
const nontmp = 8 * 1024
|
||||
const avx2loopsize = 128
|
||||
|
||||
func bytesAVX2(dst, src0, src1 []byte, size int) {
|
||||
if size < avx2loopsize {
|
||||
bytesAVX2mini(dst, src0, src1, size)
|
||||
} else if size >= avx2loopsize && size <= nontmp {
|
||||
bytesAVX2small(dst, src0, src1, size)
|
||||
} else {
|
||||
bytesAVX2big(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
const sse2loopsize = 64
|
||||
|
||||
func bytesSSE2(dst, src0, src1 []byte, size int) {
|
||||
if size < sse2loopsize {
|
||||
bytesSSE2mini(dst, src0, src1, size)
|
||||
} else if size >= sse2loopsize && size <= nontmp {
|
||||
bytesSSE2small(dst, src0, src1, size)
|
||||
} else {
|
||||
bytesSSE2big(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
func xorMatrix(dst []byte, src [][]byte) {
|
||||
switch extension {
|
||||
case avx2:
|
||||
matrixAVX2(dst, src)
|
||||
default:
|
||||
matrixSSE2(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
func matrixAVX2(dst []byte, src [][]byte) {
|
||||
size := len(dst)
|
||||
if size > nontmp {
|
||||
matrixAVX2big(dst, src)
|
||||
} else {
|
||||
matrixAVX2small(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
func matrixSSE2(dst []byte, src [][]byte) {
|
||||
size := len(dst)
|
||||
if size > nontmp {
|
||||
matrixSSE2big(dst, src)
|
||||
} else {
|
||||
matrixSSE2small(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorSrc0(dst, src0, src1 []byte)
|
||||
|
||||
//go:noescape
|
||||
func xorSrc1(dst, src0, src1 []byte)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func matrixAVX2small(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixAVX2big(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixSSE2small(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixSSE2big(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func hasAVX2() bool
|
||||
|
||||
//go:noescape
|
||||
func hasSSE2() bool
|
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
// +build !amd64 noasm
|
||||
|
||||
package xor
|
||||
|
||||
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||
bytesNoSIMD(dst, src0, src1, size)
|
||||
}
|
||||
|
||||
func xorMatrix(dst []byte, src [][]byte) {
|
||||
matrixNoSIMD(dst, src)
|
||||
}
|
||||
|
||||
func xorSrc0(dst, src0, src1 []byte) {
|
||||
bytesNoSIMD(dst, src0, src1, len(src0))
|
||||
}
|
||||
|
||||
func xorSrc1(dst, src0, src1 []byte) {
|
||||
bytesNoSIMD(dst, src0, src1, len(src1))
|
||||
}
|
201
vendor/github.com/tjfoc/gmsm/LICENSE
generated
vendored
Normal file
201
vendor/github.com/tjfoc/gmsm/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
291
vendor/github.com/tjfoc/gmsm/sm4/sm4.go
generated
vendored
Normal file
291
vendor/github.com/tjfoc/gmsm/sm4/sm4.go
generated
vendored
Normal file
@ -0,0 +1,291 @@
|
||||
/*
|
||||
Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sm4
|
||||
|
||||
import (
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"crypto/x509"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const BlockSize = 16
|
||||
|
||||
type SM4Key []byte
|
||||
|
||||
type KeySizeError int
|
||||
|
||||
// Cipher is an instance of SM4 encryption.
|
||||
type Sm4Cipher struct {
|
||||
subkeys []uint32
|
||||
block1 []uint32
|
||||
block2 []byte
|
||||
}
|
||||
|
||||
// sm4密钥参量
|
||||
var fk = [4]uint32{
|
||||
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc,
|
||||
}
|
||||
|
||||
// sm4密钥参量
|
||||
var ck = [32]uint32{
|
||||
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
|
||||
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
|
||||
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
|
||||
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
|
||||
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
|
||||
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
|
||||
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
|
||||
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279,
|
||||
}
|
||||
|
||||
// sm4密钥参量
|
||||
var sbox = [256]uint8{
|
||||
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
|
||||
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
||||
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
|
||||
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
|
||||
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
|
||||
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
|
||||
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
|
||||
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
|
||||
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
|
||||
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
|
||||
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
|
||||
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
|
||||
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
|
||||
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
|
||||
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
|
||||
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
|
||||
}
|
||||
|
||||
func rl(x uint32, i uint8) uint32 { return (x << (i % 32)) | (x >> (32 - (i % 32))) }
|
||||
|
||||
func l0(b uint32) uint32 { return b ^ rl(b, 13) ^ rl(b, 23) }
|
||||
|
||||
func l1(b uint32) uint32 { return b ^ rl(b, 2) ^ rl(b, 10) ^ rl(b, 18) ^ rl(b, 24) }
|
||||
|
||||
func feistel0(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l0(p(x1^x2^x3^rk)) }
|
||||
|
||||
func feistel1(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l1(p(x1^x2^x3^rk)) }
|
||||
|
||||
//非线性变换τ(.)
|
||||
func p(a uint32) uint32 {
|
||||
return (uint32(sbox[a>>24]) << 24) ^ (uint32(sbox[(a>>16)&0xff]) << 16) ^ (uint32(sbox[(a>>8)&0xff]) << 8) ^ uint32(sbox[(a)&0xff])
|
||||
}
|
||||
|
||||
/*
|
||||
func permuteInitialBlock(block []byte) []uint32 {
|
||||
b := make([]uint32, 4, 4)
|
||||
for i := 0; i < 4; i++ {
|
||||
b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
|
||||
(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func permuteFinalBlock(block []uint32) []byte {
|
||||
b := make([]byte, 16, 16)
|
||||
for i := 0; i < 4; i++ {
|
||||
b[i*4] = uint8(block[i] >> 24)
|
||||
b[i*4+1] = uint8(block[i] >> 16)
|
||||
b[i*4+2] = uint8(block[i] >> 8)
|
||||
b[i*4+3] = uint8(block[i])
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func cryptBlock(subkeys []uint32, dst, src []byte, decrypt bool) {
|
||||
var tm uint32
|
||||
b := permuteInitialBlock(src)
|
||||
for i := 0; i < 32; i++ {
|
||||
if decrypt {
|
||||
tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
|
||||
} else {
|
||||
tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
|
||||
}
|
||||
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
|
||||
}
|
||||
b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
|
||||
copy(dst, permuteFinalBlock(b))
|
||||
}
|
||||
*/
|
||||
|
||||
func permuteInitialBlock(b []uint32, block []byte) {
|
||||
for i := 0; i < 4; i++ {
|
||||
b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
|
||||
(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
|
||||
}
|
||||
}
|
||||
|
||||
func permuteFinalBlock(b []byte, block []uint32) {
|
||||
for i := 0; i < 4; i++ {
|
||||
b[i*4] = uint8(block[i] >> 24)
|
||||
b[i*4+1] = uint8(block[i] >> 16)
|
||||
b[i*4+2] = uint8(block[i] >> 8)
|
||||
b[i*4+3] = uint8(block[i])
|
||||
}
|
||||
}
|
||||
func cryptBlock(subkeys []uint32, b []uint32, r []byte, dst, src []byte, decrypt bool) {
|
||||
var tm uint32
|
||||
|
||||
permuteInitialBlock(b, src)
|
||||
for i := 0; i < 32; i++ {
|
||||
if decrypt {
|
||||
tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[31 - i]))
|
||||
// tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
|
||||
} else {
|
||||
tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[i]))
|
||||
// tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
|
||||
}
|
||||
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
|
||||
}
|
||||
b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
|
||||
permuteFinalBlock(r, b)
|
||||
copy(dst, r)
|
||||
}
|
||||
|
||||
func generateSubKeys(key []byte) []uint32 {
|
||||
subkeys := make([]uint32, 32)
|
||||
b := make([]uint32, 4)
|
||||
// b := permuteInitialBlock(key)
|
||||
permuteInitialBlock(b, key)
|
||||
b[0] ^= fk[0]
|
||||
b[1] ^= fk[1]
|
||||
b[2] ^= fk[2]
|
||||
b[3] ^= fk[3]
|
||||
for i := 0; i < 32; i++ {
|
||||
subkeys[i] = feistel0(b[0], b[1], b[2], b[3], ck[i])
|
||||
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], subkeys[i]
|
||||
}
|
||||
return subkeys
|
||||
}
|
||||
|
||||
func EncryptBlock(key SM4Key, dst, src []byte) {
|
||||
subkeys := generateSubKeys(key)
|
||||
cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, false)
|
||||
}
|
||||
|
||||
func DecryptBlock(key SM4Key, dst, src []byte) {
|
||||
subkeys := generateSubKeys(key)
|
||||
cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, true)
|
||||
}
|
||||
|
||||
func ReadKeyFromMem(data []byte, pwd []byte) (SM4Key, error) {
|
||||
block, _ := pem.Decode(data)
|
||||
if x509.IsEncryptedPEMBlock(block) {
|
||||
if block.Type != "SM4 ENCRYPTED KEY" {
|
||||
return nil, errors.New("SM4: unknown type")
|
||||
}
|
||||
if pwd == nil {
|
||||
return nil, errors.New("SM4: need passwd")
|
||||
}
|
||||
data, err := x509.DecryptPEMBlock(block, pwd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
if block.Type != "SM4 KEY" {
|
||||
return nil, errors.New("SM4: unknown type")
|
||||
}
|
||||
return block.Bytes, nil
|
||||
}
|
||||
|
||||
func ReadKeyFromPem(FileName string, pwd []byte) (SM4Key, error) {
|
||||
data, err := ioutil.ReadFile(FileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ReadKeyFromMem(data, pwd)
|
||||
}
|
||||
|
||||
func WriteKeytoMem(key SM4Key, pwd []byte) ([]byte, error) {
|
||||
if pwd != nil {
|
||||
block, err := x509.EncryptPEMBlock(rand.Reader,
|
||||
"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pem.EncodeToMemory(block), nil
|
||||
} else {
|
||||
block := &pem.Block{
|
||||
Type: "SM4 KEY",
|
||||
Bytes: key,
|
||||
}
|
||||
return pem.EncodeToMemory(block), nil
|
||||
}
|
||||
}
|
||||
|
||||
func WriteKeyToPem(FileName string, key SM4Key, pwd []byte) (bool, error) {
|
||||
var block *pem.Block
|
||||
|
||||
if pwd != nil {
|
||||
var err error
|
||||
block, err = x509.EncryptPEMBlock(rand.Reader,
|
||||
"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
} else {
|
||||
block = &pem.Block{
|
||||
Type: "SM4 KEY",
|
||||
Bytes: key,
|
||||
}
|
||||
}
|
||||
file, err := os.Create(FileName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer file.Close()
|
||||
err = pem.Encode(file, block)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (k KeySizeError) Error() string {
|
||||
return "SM4: invalid key size " + strconv.Itoa(int(k))
|
||||
}
|
||||
|
||||
// NewCipher creates and returns a new cipher.Block.
|
||||
func NewCipher(key []byte) (cipher.Block, error) {
|
||||
if len(key) != BlockSize {
|
||||
return nil, KeySizeError(len(key))
|
||||
}
|
||||
c := new(Sm4Cipher)
|
||||
c.subkeys = generateSubKeys(key)
|
||||
c.block1 = make([]uint32, 4)
|
||||
c.block2 = make([]byte, 16)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *Sm4Cipher) BlockSize() int {
|
||||
return BlockSize
|
||||
}
|
||||
|
||||
func (c *Sm4Cipher) Encrypt(dst, src []byte) {
|
||||
cryptBlock(c.subkeys, c.block1, c.block2, dst, src, false)
|
||||
}
|
||||
|
||||
func (c *Sm4Cipher) Decrypt(dst, src []byte) {
|
||||
cryptBlock(c.subkeys, c.block1, c.block2, dst, src, true)
|
||||
}
|
Loading…
Reference in New Issue
Block a user