Add vendor folder to git

This commit is contained in:
Lucas Käldström 2017-06-26 19:23:05 +03:00
parent 66cf5eaafb
commit 183585f56f
No known key found for this signature in database
GPG key ID: 600FEFBBD0D40D21
6916 changed files with 2629581 additions and 1 deletions

View file

@ -0,0 +1 @@
benchmark

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package cmd implements individual benchmark commands for the benchmark utility.
package cmd

View file

@ -0,0 +1,85 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"gopkg.in/cheggaaa/pb.v1"
)
var leaseKeepaliveCmd = &cobra.Command{
Use: "lease-keepalive",
Short: "Benchmark lease keepalive",
Run: leaseKeepaliveFunc,
}
var (
leaseKeepaliveTotal int
)
func init() {
RootCmd.AddCommand(leaseKeepaliveCmd)
leaseKeepaliveCmd.Flags().IntVar(&leaseKeepaliveTotal, "total", 10000, "Total number of lease keepalive requests")
}
func leaseKeepaliveFunc(cmd *cobra.Command, args []string) {
requests := make(chan struct{})
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(leaseKeepaliveTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c v3.Lease) {
defer wg.Done()
resp, err := c.Grant(context.Background(), 100)
if err != nil {
panic(err)
}
for _ = range requests {
st := time.Now()
_, err := c.KeepAliveOnce(context.TODO(), resp.ID)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < leaseKeepaliveTotal; i++ {
requests <- struct{}{}
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

View file

@ -0,0 +1,126 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"crypto/rand"
"fmt"
"os"
"runtime/pprof"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
)
// mvccPutCmd represents a storage put performance benchmarking tool
var mvccPutCmd = &cobra.Command{
Use: "put",
Short: "Benchmark put performance of storage",
Run: mvccPutFunc,
}
var (
totalNrKeys int
storageKeySize int
valueSize int
txn bool
)
func init() {
mvccCmd.AddCommand(mvccPutCmd)
mvccPutCmd.Flags().IntVar(&totalNrKeys, "total", 100, "a total number of keys to put")
mvccPutCmd.Flags().IntVar(&storageKeySize, "key-size", 64, "a size of key (Byte)")
mvccPutCmd.Flags().IntVar(&valueSize, "value-size", 64, "a size of value (Byte)")
mvccPutCmd.Flags().BoolVar(&txn, "txn", false, "put a key in transaction or not")
// TODO: after the PR https://github.com/spf13/cobra/pull/220 is merged, the below pprof related flags should be moved to RootCmd
mvccPutCmd.Flags().StringVar(&cpuProfPath, "cpuprofile", "", "the path of file for storing cpu profile result")
mvccPutCmd.Flags().StringVar(&memProfPath, "memprofile", "", "the path of file for storing heap profile result")
}
func createBytesSlice(bytesN, sliceN int) [][]byte {
rs := make([][]byte, sliceN)
for i := range rs {
rs[i] = make([]byte, bytesN)
if _, err := rand.Read(rs[i]); err != nil {
panic(err)
}
}
return rs
}
func mvccPutFunc(cmd *cobra.Command, args []string) {
if cpuProfPath != "" {
f, err := os.Create(cpuProfPath)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to create a file for storing cpu profile result: ", err)
os.Exit(1)
}
err = pprof.StartCPUProfile(f)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to start cpu profile: ", err)
os.Exit(1)
}
defer pprof.StopCPUProfile()
}
if memProfPath != "" {
f, err := os.Create(memProfPath)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to create a file for storing heap profile result: ", err)
os.Exit(1)
}
defer func() {
err := pprof.WriteHeapProfile(f)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to write heap profile result: ", err)
// can do nothing for handling the error
}
}()
}
keys := createBytesSlice(storageKeySize, totalNrKeys)
vals := createBytesSlice(valueSize, totalNrKeys)
r := newReport()
rrc := r.Results()
rc := r.Run()
for i := 0; i < totalNrKeys; i++ {
st := time.Now()
if txn {
id := s.TxnBegin()
if _, err := s.TxnPut(id, keys[i], vals[i], lease.NoLease); err != nil {
fmt.Fprintln(os.Stderr, "txn put error:", err)
os.Exit(1)
}
s.TxnEnd(id)
} else {
s.Put(keys[i], vals[i], lease.NoLease)
}
rrc <- report.Result{Start: st, End: time.Now()}
}
close(r.Results())
fmt.Printf("%s", <-rc)
}

View file

@ -0,0 +1,59 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"os"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/spf13/cobra"
)
var (
batchInterval int
batchLimit int
s mvcc.KV
)
func initMVCC() {
be := backend.New("mvcc-bench", time.Duration(batchInterval), batchLimit)
s = mvcc.NewStore(be, &lease.FakeLessor{}, nil)
os.Remove("mvcc-bench") // boltDB has an opened fd, so removing the file is ok
}
// mvccCmd represents the MVCC storage benchmarking tools
var mvccCmd = &cobra.Command{
Use: "mvcc",
Short: "Benchmark mvcc",
Long: `storage subcommand is a set of various benchmark tools for MVCC storage subsystem of etcd.
Actual benchmarks are implemented as its subcommands.`,
PersistentPreRun: mvccPreRun,
}
func init() {
RootCmd.AddCommand(mvccCmd)
mvccCmd.PersistentFlags().IntVar(&batchInterval, "batch-interval", 100, "Interval of batching (milliseconds)")
mvccCmd.PersistentFlags().IntVar(&batchLimit, "batch-limit", 10000, "A limit of batched transaction")
}
func mvccPreRun(cmd *cobra.Command, args []string) {
initMVCC()
}

View file

@ -0,0 +1,160 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/binary"
"fmt"
"math"
"math/rand"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// putCmd represents the put command
var putCmd = &cobra.Command{
Use: "put",
Short: "Benchmark put",
Run: putFunc,
}
var (
keySize int
valSize int
putTotal int
putRate int
keySpaceSize int
seqKeys bool
compactInterval time.Duration
compactIndexDelta int64
)
func init() {
RootCmd.AddCommand(putCmd)
putCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of put request")
putCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of put request")
putCmd.Flags().IntVar(&putRate, "rate", 0, "Maximum puts per second (0 is no limit)")
putCmd.Flags().IntVar(&putTotal, "total", 10000, "Total number of put requests")
putCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
putCmd.Flags().BoolVar(&seqKeys, "sequential-keys", false, "Use sequential keys")
putCmd.Flags().DurationVar(&compactInterval, "compact-interval", 0, `Interval to compact database (do not duplicate this with etcd's 'auto-compaction-retention' flag) (e.g. --compact-interval=5m compacts every 5-minute)`)
putCmd.Flags().Int64Var(&compactIndexDelta, "compact-index-delta", 1000, "Delta between current revision and compact revision (e.g. current revision 10000, compact at 9000)")
}
func putFunc(cmd *cobra.Command, args []string) {
if keySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
os.Exit(1)
}
requests := make(chan v3.Op, totalClients)
if putRate == 0 {
putRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(putRate), 1)
clients := mustCreateClients(totalClients, totalConns)
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
bar = pb.New(putTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for op := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Do(context.Background(), op)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < putTotal; i++ {
if seqKeys {
binary.PutVarint(k, int64(i%keySpaceSize))
} else {
binary.PutVarint(k, int64(rand.Intn(keySpaceSize)))
}
requests <- v3.OpPut(string(k), v)
}
close(requests)
}()
if compactInterval > 0 {
go func() {
for {
time.Sleep(compactInterval)
compactKV(clients)
}
}()
}
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Println(<-rc)
}
func compactKV(clients []*v3.Client) {
var curRev int64
for _, c := range clients {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := c.KV.Get(ctx, "foo")
cancel()
if err != nil {
panic(err)
}
curRev = resp.Header.Revision
break
}
revToCompact := max(0, curRev-compactIndexDelta)
for _, c := range clients {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
_, err := c.KV.Compact(ctx, revToCompact)
cancel()
if err != nil {
panic(err)
}
break
}
}
func max(n1, n2 int64) int64 {
if n1 > n2 {
return n1
}
return n2
}

View file

@ -0,0 +1,119 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"math"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// rangeCmd represents the range command
var rangeCmd = &cobra.Command{
Use: "range key [end-range]",
Short: "Benchmark range",
Run: rangeFunc,
}
var (
rangeRate int
rangeTotal int
rangeConsistency string
)
func init() {
RootCmd.AddCommand(rangeCmd)
rangeCmd.Flags().IntVar(&rangeRate, "rate", 0, "Maximum range requests per second (0 is no limit)")
rangeCmd.Flags().IntVar(&rangeTotal, "total", 10000, "Total number of range requests")
rangeCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
}
func rangeFunc(cmd *cobra.Command, args []string) {
if len(args) == 0 || len(args) > 2 {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
k := args[0]
end := ""
if len(args) == 2 {
end = args[1]
}
if rangeConsistency == "l" {
fmt.Println("bench with linearizable range")
} else if rangeConsistency == "s" {
fmt.Println("bench with serializable range")
} else {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
if rangeRate == 0 {
rangeRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(rangeRate), 1)
requests := make(chan v3.Op, totalClients)
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(rangeTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for op := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Do(context.Background(), op)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < rangeTotal; i++ {
opts := []v3.OpOption{v3.WithRange(end)}
if rangeConsistency == "s" {
opts = append(opts, v3.WithSerializable())
}
op := v3.OpGet(k, opts...)
requests <- op
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

View file

@ -0,0 +1,70 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"sync"
"time"
"github.com/coreos/etcd/pkg/transport"
"github.com/spf13/cobra"
"gopkg.in/cheggaaa/pb.v1"
)
// This represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "benchmark",
Short: "A low-level benchmark tool for etcd3",
Long: `benchmark is a low-level benchmark tool for etcd3.
It uses gRPC client directly and does not depend on
etcd client library.
`,
}
var (
endpoints []string
totalConns uint
totalClients uint
precise bool
sample bool
bar *pb.ProgressBar
wg sync.WaitGroup
tls transport.TLSInfo
cpuProfPath string
memProfPath string
user string
dialTimeout time.Duration
)
func init() {
RootCmd.PersistentFlags().StringSliceVar(&endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
RootCmd.PersistentFlags().UintVar(&totalConns, "conns", 1, "Total number of gRPC connections")
RootCmd.PersistentFlags().UintVar(&totalClients, "clients", 1, "Total number of gRPC clients")
RootCmd.PersistentFlags().BoolVar(&precise, "precise", false, "use full floating point precision")
RootCmd.PersistentFlags().BoolVar(&sample, "sample", false, "'true' to sample requests for every second")
RootCmd.PersistentFlags().StringVar(&tls.CertFile, "cert", "", "identify HTTPS client using this SSL certificate file")
RootCmd.PersistentFlags().StringVar(&tls.KeyFile, "key", "", "identify HTTPS client using this SSL key file")
RootCmd.PersistentFlags().StringVar(&tls.CAFile, "cacert", "", "verify certificates of HTTPS-enabled servers using this CA bundle")
RootCmd.PersistentFlags().StringVar(&user, "user", "", "specify username and password in username:password format")
RootCmd.PersistentFlags().DurationVar(&dialTimeout, "dial-timeout", 0, "dial timeout for client connections")
}

View file

@ -0,0 +1,165 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/binary"
"fmt"
"math/rand"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
v3sync "github.com/coreos/etcd/clientv3/concurrency"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"gopkg.in/cheggaaa/pb.v1"
)
// stmCmd represents the STM benchmark command
var stmCmd = &cobra.Command{
Use: "stm",
Short: "Benchmark STM",
Run: stmFunc,
}
type stmApply func(v3sync.STM) error
var (
stmIsolation string
stmTotal int
stmKeysPerTxn int
stmKeyCount int
stmValSize int
stmWritePercent int
stmMutex bool
mkSTM func(context.Context, *v3.Client, func(v3sync.STM) error) (*v3.TxnResponse, error)
)
func init() {
RootCmd.AddCommand(stmCmd)
stmCmd.Flags().StringVar(&stmIsolation, "isolation", "r", "Read Committed (c), Repeatable Reads (r), or Serializable (s)")
stmCmd.Flags().IntVar(&stmKeyCount, "keys", 1, "Total unique keys accessible by the benchmark")
stmCmd.Flags().IntVar(&stmTotal, "total", 10000, "Total number of completed STM transactions")
stmCmd.Flags().IntVar(&stmKeysPerTxn, "keys-per-txn", 1, "Number of keys to access per transaction")
stmCmd.Flags().IntVar(&stmWritePercent, "txn-wr-percent", 50, "Percentage of keys to overwrite per transaction")
stmCmd.Flags().BoolVar(&stmMutex, "use-mutex", false, "Wrap STM transaction in a distributed mutex")
stmCmd.Flags().IntVar(&stmValSize, "val-size", 8, "Value size of each STM put request")
}
func stmFunc(cmd *cobra.Command, args []string) {
if stmKeyCount <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --keys, got (%v)", stmKeyCount)
os.Exit(1)
}
if stmWritePercent < 0 || stmWritePercent > 100 {
fmt.Fprintf(os.Stderr, "expected [0, 100] --txn-wr-percent, got (%v)", stmWritePercent)
os.Exit(1)
}
if stmKeysPerTxn < 0 || stmKeysPerTxn > stmKeyCount {
fmt.Fprintf(os.Stderr, "expected --keys-per-txn between 0 and %v, got (%v)", stmKeyCount, stmKeysPerTxn)
os.Exit(1)
}
switch stmIsolation {
case "c":
mkSTM = v3sync.NewSTMReadCommitted
case "r":
mkSTM = v3sync.NewSTMRepeatable
case "s":
mkSTM = v3sync.NewSTMSerializable
default:
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
requests := make(chan stmApply, totalClients)
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(stmTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go doSTM(clients[i], requests, r.Results())
}
go func() {
for i := 0; i < stmTotal; i++ {
kset := make(map[string]struct{})
for len(kset) != stmKeysPerTxn {
k := make([]byte, 16)
binary.PutVarint(k, int64(rand.Intn(stmKeyCount)))
s := string(k)
kset[s] = struct{}{}
}
applyf := func(s v3sync.STM) error {
wrs := int(float32(len(kset)*stmWritePercent) / 100.0)
for k := range kset {
s.Get(k)
if wrs > 0 {
s.Put(k, string(mustRandBytes(stmValSize)))
wrs--
}
}
return nil
}
requests <- applyf
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}
func doSTM(client *v3.Client, requests <-chan stmApply, results chan<- report.Result) {
defer wg.Done()
var m *v3sync.Mutex
if stmMutex {
s, err := v3sync.NewSession(client)
if err != nil {
panic(err)
}
m = v3sync.NewMutex(s, "stmlock")
}
for applyf := range requests {
st := time.Now()
if m != nil {
m.Lock(context.TODO())
}
_, err := mkSTM(context.TODO(), client, applyf)
if m != nil {
m.Unlock(context.TODO())
}
results <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}

View file

@ -0,0 +1,103 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"crypto/rand"
"fmt"
"log"
"os"
"strings"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
)
var (
// dialTotal counts the number of mustCreateConn calls so that endpoint
// connections can be handed out in round-robin order
dialTotal int
)
func mustCreateConn() *clientv3.Client {
endpoint := endpoints[dialTotal%len(endpoints)]
dialTotal++
cfg := clientv3.Config{
Endpoints: []string{endpoint},
DialTimeout: dialTimeout,
}
if !tls.Empty() {
cfgtls, err := tls.ClientConfig()
if err != nil {
fmt.Fprintf(os.Stderr, "bad tls config: %v\n", err)
os.Exit(1)
}
cfg.TLS = cfgtls
}
if len(user) != 0 {
splitted := strings.SplitN(user, ":", 2)
if len(splitted) != 2 {
fmt.Fprintf(os.Stderr, "bad user information: %s\n", user)
os.Exit(1)
}
cfg.Username = splitted[0]
cfg.Password = splitted[1]
}
client, err := clientv3.New(cfg)
clientv3.SetLogger(log.New(os.Stderr, "grpc", 0))
if err != nil {
fmt.Fprintf(os.Stderr, "dial error: %v\n", err)
os.Exit(1)
}
return client
}
func mustCreateClients(totalClients, totalConns uint) []*clientv3.Client {
conns := make([]*clientv3.Client, totalConns)
for i := range conns {
conns[i] = mustCreateConn()
}
clients := make([]*clientv3.Client, totalClients)
for i := range clients {
clients[i] = conns[i%int(totalConns)]
}
return clients
}
func mustRandBytes(n int) []byte {
rb := make([]byte, n)
_, err := rand.Read(rb)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to generate value: %v\n", err)
os.Exit(1)
}
return rb
}
func newReport() report.Report {
p := "%4.4f"
if precise {
p = "%g"
}
if sample {
return report.NewReportSample(p)
}
return report.NewReport(p)
}

View file

@ -0,0 +1,211 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/binary"
"fmt"
"math/rand"
"os"
"sync/atomic"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"gopkg.in/cheggaaa/pb.v1"
)
// watchCmd represents the watch command
var watchCmd = &cobra.Command{
Use: "watch",
Short: "Benchmark watch",
Long: `Benchmark watch tests the performance of processing watch requests and
sending events to watchers. It tests the sending performance by
changing the value of the watched keys with concurrent put
requests.
During the test, each watcher watches (--total/--watchers) keys
(a watcher might watch on the same key multiple times if
--watched-key-total is small).
Each key is watched by (--total/--watched-key-total) watchers.
`,
Run: watchFunc,
}
var (
watchTotalStreams int
watchTotal int
watchedKeyTotal int
watchPutRate int
watchPutTotal int
watchKeySize int
watchKeySpaceSize int
watchSeqKeys bool
eventsTotal int
nrWatchCompleted int32
nrRecvCompleted int32
watchCompletedNotifier chan struct{}
recvCompletedNotifier chan struct{}
)
func init() {
RootCmd.AddCommand(watchCmd)
watchCmd.Flags().IntVar(&watchTotalStreams, "watchers", 10000, "Total number of watchers")
watchCmd.Flags().IntVar(&watchTotal, "total", 100000, "Total number of watch requests")
watchCmd.Flags().IntVar(&watchedKeyTotal, "watched-key-total", 10000, "Total number of keys to be watched")
watchCmd.Flags().IntVar(&watchPutRate, "put-rate", 100, "Number of keys to put per second")
watchCmd.Flags().IntVar(&watchPutTotal, "put-total", 10000, "Number of put requests")
watchCmd.Flags().IntVar(&watchKeySize, "key-size", 32, "Key size of watch request")
watchCmd.Flags().IntVar(&watchKeySpaceSize, "key-space-size", 1, "Maximum possible keys")
watchCmd.Flags().BoolVar(&watchSeqKeys, "sequential-keys", false, "Use sequential keys")
}
func watchFunc(cmd *cobra.Command, args []string) {
if watchKeySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", watchKeySpaceSize)
os.Exit(1)
}
watched := make([]string, watchedKeyTotal)
numWatchers := make(map[string]int)
for i := range watched {
k := make([]byte, watchKeySize)
if watchSeqKeys {
binary.PutVarint(k, int64(i%watchKeySpaceSize))
} else {
binary.PutVarint(k, int64(rand.Intn(watchKeySpaceSize)))
}
watched[i] = string(k)
}
requests := make(chan string, totalClients)
clients := mustCreateClients(totalClients, totalConns)
streams := make([]v3.Watcher, watchTotalStreams)
for i := range streams {
streams[i] = v3.NewWatcher(clients[i%len(clients)])
}
// watching phase
bar = pb.New(watchTotal)
bar.Format("Bom !")
bar.Start()
atomic.StoreInt32(&nrWatchCompleted, int32(0))
watchCompletedNotifier = make(chan struct{})
r := report.NewReportRate("%4.4f")
for i := range streams {
go doWatch(streams[i], requests, r.Results())
}
go func() {
for i := 0; i < watchTotal; i++ {
key := watched[i%len(watched)]
requests <- key
numWatchers[key]++
}
close(requests)
}()
rc := r.Run()
<-watchCompletedNotifier
bar.Finish()
close(r.Results())
fmt.Printf("Watch creation summary:\n%s", <-rc)
// put phase
eventsTotal = 0
for i := 0; i < watchPutTotal; i++ {
eventsTotal += numWatchers[watched[i%len(watched)]]
}
bar = pb.New(eventsTotal)
bar.Format("Bom !")
bar.Start()
atomic.StoreInt32(&nrRecvCompleted, 0)
recvCompletedNotifier = make(chan struct{})
putreqc := make(chan v3.Op)
r = report.NewReportRate("%4.4f")
for i := 0; i < watchPutTotal; i++ {
go func(c *v3.Client) {
for op := range putreqc {
if _, err := c.Do(context.TODO(), op); err != nil {
fmt.Fprintf(os.Stderr, "failed to Put for watch benchmark: %v\n", err)
os.Exit(1)
}
}
}(clients[i%len(clients)])
}
go func() {
for i := 0; i < watchPutTotal; i++ {
putreqc <- v3.OpPut(watched[i%(len(watched))], "data")
// TODO: use a real rate-limiter instead of sleep.
time.Sleep(time.Second / time.Duration(watchPutRate))
}
close(putreqc)
}()
rc = r.Run()
<-recvCompletedNotifier
bar.Finish()
close(r.Results())
fmt.Printf("Watch events received summary:\n%s", <-rc)
}
func doWatch(stream v3.Watcher, requests <-chan string, results chan<- report.Result) {
for r := range requests {
st := time.Now()
wch := stream.Watch(context.TODO(), r)
results <- report.Result{Start: st, End: time.Now()}
bar.Increment()
go recvWatchChan(wch, results)
}
atomic.AddInt32(&nrWatchCompleted, 1)
if atomic.LoadInt32(&nrWatchCompleted) == int32(watchTotalStreams) {
watchCompletedNotifier <- struct{}{}
}
}
func recvWatchChan(wch v3.WatchChan, results chan<- report.Result) {
for r := range wch {
st := time.Now()
for range r.Events {
results <- report.Result{Start: st, End: time.Now()}
bar.Increment()
atomic.AddInt32(&nrRecvCompleted, 1)
}
if atomic.LoadInt32(&nrRecvCompleted) == int32(eventsTotal) {
recvCompletedNotifier <- struct{}{}
break
}
}
}

View file

@ -0,0 +1,118 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"sync"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"gopkg.in/cheggaaa/pb.v1"
)
// watchGetCmd represents the watch command
var watchGetCmd = &cobra.Command{
Use: "watch-get",
Short: "Benchmark watch with get",
Long: `Benchmark for serialized key gets with many unsynced watchers`,
Run: watchGetFunc,
}
var (
watchGetTotalWatchers int
watchGetTotalStreams int
watchEvents int
firstWatch sync.Once
)
func init() {
RootCmd.AddCommand(watchGetCmd)
watchGetCmd.Flags().IntVar(&watchGetTotalWatchers, "watchers", 10000, "Total number of watchers")
watchGetCmd.Flags().IntVar(&watchGetTotalStreams, "streams", 1, "Total number of watcher streams")
watchGetCmd.Flags().IntVar(&watchEvents, "events", 8, "Number of events per watcher")
}
func watchGetFunc(cmd *cobra.Command, args []string) {
clients := mustCreateClients(totalClients, totalConns)
getClient := mustCreateClients(1, 1)
// setup keys for watchers
watchRev := int64(0)
for i := 0; i < watchEvents; i++ {
v := fmt.Sprintf("%d", i)
resp, err := clients[0].Put(context.TODO(), "watchkey", v)
if err != nil {
panic(err)
}
if i == 0 {
watchRev = resp.Header.Revision
}
}
streams := make([]v3.Watcher, watchGetTotalStreams)
for i := range streams {
streams[i] = v3.NewWatcher(clients[i%len(clients)])
}
bar = pb.New(watchGetTotalWatchers * watchEvents)
bar.Format("Bom !")
bar.Start()
// report from trying to do serialized gets with concurrent watchers
r := newReport()
ctx, cancel := context.WithCancel(context.TODO())
f := func() {
defer close(r.Results())
for {
st := time.Now()
_, err := getClient[0].Get(ctx, "abc", v3.WithSerializable())
if ctx.Err() != nil {
break
}
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
}
}
wg.Add(watchGetTotalWatchers)
for i := 0; i < watchGetTotalWatchers; i++ {
go doUnsyncWatch(streams[i%len(streams)], watchRev, f)
}
rc := r.Run()
wg.Wait()
cancel()
bar.Finish()
fmt.Printf("Get during watch summary:\n%s", <-rc)
}
func doUnsyncWatch(stream v3.Watcher, rev int64, f func()) {
defer wg.Done()
wch := stream.Watch(context.TODO(), "watchkey", v3.WithRev(rev))
if wch == nil {
panic("could not open watch channel")
}
firstWatch.Do(func() { go f() })
i := 0
for i < watchEvents {
wev := <-wch
i += len(wev.Events)
bar.Add(len(wev.Events))
}
}

View file

@ -0,0 +1,92 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/net/context"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// watchLatencyCmd represents the watch latency command
var watchLatencyCmd = &cobra.Command{
Use: "watch-latency",
Short: "Benchmark watch latency",
Long: `Benchmarks the latency for watches by measuring
the latency between writing to a key and receiving the
associated watch response.`,
Run: watchLatencyFunc,
}
var (
watchLTotal int
watchLPutRate int
watchLKeySize int
watchLValueSize int
)
func init() {
RootCmd.AddCommand(watchLatencyCmd)
watchLatencyCmd.Flags().IntVar(&watchLTotal, "total", 10000, "Total number of watch responses.")
watchLatencyCmd.Flags().IntVar(&watchLPutRate, "put-rate", 100, "Number of keys to put per second")
watchLatencyCmd.Flags().IntVar(&watchLKeySize, "key-size", 32, "Key size of watch request")
watchLatencyCmd.Flags().IntVar(&watchLValueSize, "val-size", 32, "Val size of watch request")
}
func watchLatencyFunc(cmd *cobra.Command, args []string) {
key := string(mustRandBytes(watchLKeySize))
value := string(mustRandBytes(watchLValueSize))
client := mustCreateConn()
stream := v3.NewWatcher(client)
wch := stream.Watch(context.TODO(), key)
bar = pb.New(watchLTotal)
bar.Format("Bom !")
bar.Start()
limiter := rate.NewLimiter(rate.Limit(watchLPutRate), watchLPutRate)
r := newReport()
rc := r.Run()
for i := 0; i < watchLTotal; i++ {
// limit key put as per reqRate
if err := limiter.Wait(context.TODO()); err != nil {
break
}
_, err := client.Put(context.TODO(), string(key), value)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to Put for watch latency benchmark: %v\n", err)
os.Exit(1)
}
st := time.Now()
<-wch
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

16
vendor/github.com/coreos/etcd/tools/benchmark/doc.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// benchmark is a program for benchmarking etcd v3 API performance.
package main

29
vendor/github.com/coreos/etcd/tools/benchmark/main.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"github.com/coreos/etcd/tools/benchmark/cmd"
)
func main() {
if err := cmd.RootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(-1)
}
}

View file

@ -0,0 +1,74 @@
### etcd-dump-db
etcd-dump-db inspects etcd db files.
```
Usage:
etcd-dump-db [command]
Available Commands:
list-bucket bucket lists all buckets.
iterate-bucket iterate-bucket lists key-value pairs in reverse order.
hash hash computes the hash of db file.
Flags:
-h, --help[=false]: help for etcd-dump-db
Use "etcd-dump-db [command] --help" for more information about a command.
```
#### list-bucket [data dir or db file path]
Lists all buckets.
```
$ etcd-dump-db list-bucket agent01/agent.etcd
alarm
auth
authRoles
authUsers
cluster
key
lease
members
members_removed
meta
```
#### hash [data dir or db file path]
Computes the hash of db file.
```
$ etcd-dump-db hash agent01/agent.etcd
db path: agent01/agent.etcd/member/snap/db
Hash: 3700260467
$ etcd-dump-db hash agent02/agent.etcd
db path: agent02/agent.etcd/member/snap/db
Hash: 3700260467
$ etcd-dump-db hash agent03/agent.etcd
db path: agent03/agent.etcd/member/snap/db
Hash: 3700260467
```
#### iterate-bucket [data dir or db file path]
Lists key-value pairs in reverse order.
```
$ etcd-dump-db iterate-bucket agent03/agent.etcd --bucket=key --limit 3
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\tt", value="\n\x153640412599896088633_9"
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\bt", value="\n\x153640412599896088633_8"
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\at", value="\n\x153640412599896088633_7"
```

View file

@ -0,0 +1,83 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"path/filepath"
"github.com/boltdb/bolt"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
)
func snapDir(dataDir string) string {
return filepath.Join(dataDir, "member", "snap")
}
func getBuckets(dbPath string) (buckets []string, err error) {
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
if derr != nil {
return nil, derr
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
return tx.ForEach(func(b []byte, _ *bolt.Bucket) error {
buckets = append(buckets, string(b))
return nil
})
})
return
}
func iterateBucket(dbPath, bucket string, limit uint64) (err error) {
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
if derr != nil {
return derr
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(bucket))
if b == nil {
return fmt.Errorf("got nil bucket for %s", bucket)
}
c := b.Cursor()
// iterate in reverse order (use First() and Next() for ascending order)
for k, v := c.Last(); k != nil; k, v = c.Prev() {
fmt.Printf("key=%q, value=%q\n", k, v)
limit--
if limit == 0 {
break
}
}
return nil
})
return
}
func getHash(dbPath string) (hash uint32, err error) {
b := backend.NewDefaultBackend(dbPath)
return b.Hash(mvcc.DefaultIgnores)
}
// TODO: revert by revision and find specified hash value
// currently, it's hard because lease is in separate bucket
// and does not modify revision

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-dump-db inspects etcd db files.
package main

View file

@ -0,0 +1,130 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"github.com/spf13/cobra"
)
var (
rootCommand = &cobra.Command{
Use: "etcd-dump-db",
Short: "etcd-dump-db inspects etcd db files.",
}
listBucketCommand = &cobra.Command{
Use: "list-bucket [data dir or db file path]",
Short: "bucket lists all buckets.",
Run: listBucketCommandFunc,
}
iterateBucketCommand = &cobra.Command{
Use: "iterate-bucket [data dir or db file path]",
Short: "iterate-bucket lists key-value pairs in reverse order.",
Run: iterateBucketCommandFunc,
}
getHashCommand = &cobra.Command{
Use: "hash [data dir or db file path]",
Short: "hash computes the hash of db file.",
Run: getHashCommandFunc,
}
)
var (
iterateBucketName string
iterateBucketLimit uint64
)
func init() {
iterateBucketCommand.PersistentFlags().StringVar(&iterateBucketName, "bucket", "", "bucket name to iterate")
iterateBucketCommand.PersistentFlags().Uint64Var(&iterateBucketLimit, "limit", 0, "max number of key-value pairs to iterate (0< to iterate all)")
rootCommand.AddCommand(listBucketCommand)
rootCommand.AddCommand(iterateBucketCommand)
rootCommand.AddCommand(getHashCommand)
}
func main() {
if err := rootCommand.Execute(); err != nil {
fmt.Fprintln(os.Stdout, err)
os.Exit(1)
}
}
func listBucketCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
bts, err := getBuckets(dp)
if err != nil {
log.Fatal(err)
}
for _, b := range bts {
fmt.Println(b)
}
}
func iterateBucketCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
if iterateBucketName == "" {
log.Fatal("got empty bucket name")
}
err := iterateBucket(dp, iterateBucketName, iterateBucketLimit)
if err != nil {
log.Fatal(err)
}
}
func getHashCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
hash, err := getHash(dp)
if err != nil {
log.Fatal(err)
}
fmt.Printf("db path: %s\nHash: %d\n", dp, hash)
}

View file

@ -0,0 +1,22 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "os"
func existFileOrDir(name string) bool {
_, err := os.Stat(name)
return err == nil
}

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-dump-logs is a program for analyzing etcd server write ahead logs.
package main

View file

@ -0,0 +1,162 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"log"
"path/filepath"
"time"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
)
func main() {
from := flag.String("data-dir", "", "")
snapfile := flag.String("start-snap", "", "The base name of snapshot file to start dumping")
index := flag.Uint64("start-index", 0, "The index to start dumping")
flag.Parse()
if *from == "" {
log.Fatal("Must provide -data-dir flag.")
}
if *snapfile != "" && *index != 0 {
log.Fatal("start-snap and start-index flags cannot be used together.")
}
var (
walsnap walpb.Snapshot
snapshot *raftpb.Snapshot
err error
)
isIndex := *index != 0
if isIndex {
fmt.Printf("Start dumping log entries from index %d.\n", *index)
walsnap.Index = *index
} else {
if *snapfile == "" {
ss := snap.New(snapDir(*from))
snapshot, err = ss.Load()
} else {
snapshot, err = snap.Read(filepath.Join(snapDir(*from), *snapfile))
}
switch err {
case nil:
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
nodes := genIDSlice(snapshot.Metadata.ConfState.Nodes)
fmt.Printf("Snapshot:\nterm=%d index=%d nodes=%s\n",
walsnap.Term, walsnap.Index, nodes)
case snap.ErrNoSnapshot:
fmt.Printf("Snapshot:\nempty\n")
default:
log.Fatalf("Failed loading snapshot: %v", err)
}
fmt.Println("Start dupmping log entries from snapshot.")
}
w, err := wal.OpenForRead(walDir(*from), walsnap)
if err != nil {
log.Fatalf("Failed opening WAL: %v", err)
}
wmetadata, state, ents, err := w.ReadAll()
w.Close()
if err != nil && (!isIndex || err != wal.ErrSnapshotNotFound) {
log.Fatalf("Failed reading WAL: %v", err)
}
id, cid := parseWALMetadata(wmetadata)
vid := types.ID(state.Vote)
fmt.Printf("WAL metadata:\nnodeID=%s clusterID=%s term=%d commitIndex=%d vote=%s\n",
id, cid, state.Term, state.Commit, vid)
fmt.Printf("WAL entries:\n")
fmt.Printf("lastIndex=%d\n", ents[len(ents)-1].Index)
fmt.Printf("%4s\t%10s\ttype\tdata\n", "term", "index")
for _, e := range ents {
msg := fmt.Sprintf("%4d\t%10d", e.Term, e.Index)
switch e.Type {
case raftpb.EntryNormal:
msg = fmt.Sprintf("%s\tnorm", msg)
var rr etcdserverpb.InternalRaftRequest
if err := rr.Unmarshal(e.Data); err == nil {
msg = fmt.Sprintf("%s\t%s", msg, rr.String())
break
}
var r etcdserverpb.Request
if err := r.Unmarshal(e.Data); err == nil {
switch r.Method {
case "":
msg = fmt.Sprintf("%s\tnoop", msg)
case "SYNC":
msg = fmt.Sprintf("%s\tmethod=SYNC time=%q", msg, time.Unix(0, r.Time))
case "QGET", "DELETE":
msg = fmt.Sprintf("%s\tmethod=%s path=%s", msg, r.Method, excerpt(r.Path, 64, 64))
default:
msg = fmt.Sprintf("%s\tmethod=%s path=%s val=%s", msg, r.Method, excerpt(r.Path, 64, 64), excerpt(r.Val, 128, 0))
}
break
}
msg = fmt.Sprintf("%s\t???", msg)
case raftpb.EntryConfChange:
msg = fmt.Sprintf("%s\tconf", msg)
var r raftpb.ConfChange
if err := r.Unmarshal(e.Data); err != nil {
msg = fmt.Sprintf("%s\t???", msg)
} else {
msg = fmt.Sprintf("%s\tmethod=%s id=%s", msg, r.Type, types.ID(r.NodeID))
}
}
fmt.Println(msg)
}
}
func walDir(dataDir string) string { return filepath.Join(dataDir, "member", "wal") }
func snapDir(dataDir string) string { return filepath.Join(dataDir, "member", "snap") }
func parseWALMetadata(b []byte) (id, cid types.ID) {
var metadata etcdserverpb.Metadata
pbutil.MustUnmarshal(&metadata, b)
id = types.ID(metadata.NodeID)
cid = types.ID(metadata.ClusterID)
return
}
func genIDSlice(a []uint64) []types.ID {
ids := make([]types.ID, len(a))
for i, id := range a {
ids[i] = types.ID(id)
}
return ids
}
// excerpt replaces middle part with ellipsis and returns a double-quoted
// string safely escaped with Go syntax.
func excerpt(str string, pre, suf int) string {
if pre+suf > len(str) {
return fmt.Sprintf("%q", str)
}
return fmt.Sprintf("%q...%q", str[:pre], str[len(str)-suf:])
}

View file

@ -0,0 +1,4 @@
agent-1: mkdir -p agent-1 && cd agent-1 && ../bin/etcd-agent -etcd-path ../bin/etcd -port 127.0.0.1:19027 -use-root=false
agent-2: mkdir -p agent-2 && cd agent-2 && ../bin/etcd-agent -etcd-path ../bin/etcd -port 127.0.0.1:29027 -use-root=false
agent-3: mkdir -p agent-3 && cd agent-3 && ../bin/etcd-agent -etcd-path ../bin/etcd -port 127.0.0.1:39027 -use-root=false
stresser: sleep 1s && bin/etcd-tester -agent-endpoints "127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027" -client-ports 12379,22379,32379 -peer-ports 12380,22380,32380

View file

@ -0,0 +1,51 @@
# etcd functional test suite
etcd functional test suite tests the functionality of an etcd cluster with a focus on failure resistance under high pressure. It sets up an etcd cluster and inject failures into the cluster by killing the process or isolate the network of the process. It expects the etcd cluster to recover within a short amount of time after fixing the fault.
etcd functional test suite has two components: etcd-agent and etcd-tester. etcd-agent runs on every test machines and etcd-tester is a single controller of the test. etcd-tester controls all the etcd-agent to start etcd clusters and simulate various failure cases.
## requirements
The environment of the cluster must be stable enough, so etcd test suite can assume that most of the failures are generated by itself.
## etcd agent
etcd agent is a daemon on each machines. It can start, stop, restart, isolate and terminate an etcd process. The agent exposes these functionality via HTTP RPC.
## etcd tester
etcd functional tester control the progress of the functional tests. It calls the RPC of the etcd agent to simulate various test cases. For example, it can start a three members cluster by sending three start RPC calls to three different etcd agents. It can make one of the member failed by sending stop RPC call to one etcd agent.
## with Docker (optionally)
To run the functional tests using Docker, the provided script can be used to set up an environment using Docker Compose.
Script (on linux):
```sh
./tools/functional-tester/test
```
Running the script requires:
- Docker 1.9+ (with networking support) - to create isolated network
- docker-compose - to create etcd cluster and tester
- A multi-arch Go toolchain (OSX)
Notes:
- Docker image is based on Alpine Linux OS running in privileged mode to allow iptables manipulation.
- To specify testing parameters (etcd-tester arguments) modify tools/functional-tester/docker/docker-compose.yml or start etcd-tester manually
- (OSX) make sure that etcd binary is built for linux/amd64 (eg. `rm bin/etcd;GOOS=linux GOARCH=amd64 ./tools/functional-tester/test`) otherwise you get `exec format error`
## with Goreman
To run the functional tests on a single machine using Goreman, build with the provided build script and run with the provided Procfile:
```sh
./tools/functional-tester/build
goreman -f tools/functional-tester/Procfile start
```
Notes:
- The etcd-agent will not run with root privileges; iptables manipulation is disabled.
- To specify testing parameters (etcd-tester arguments) modify tools/functional-tester/Procfile or start etcd-tester manually

View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
if ! [[ "$0" =~ "tools/functional-tester/build" ]]; then
echo "must be run from repository root"
exit 255
fi
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-agent ./cmd/tools/functional-tester/etcd-agent
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-tester ./cmd/tools/functional-tester/etcd-tester

View file

@ -0,0 +1,8 @@
FROM alpine
RUN apk update
RUN apk add -v iptables sudo
ADD bin/etcd-agent /
ADD bin/etcd /
ADD bin/etcd-tester /
RUN mkdir /failure_archive
CMD ["./etcd-agent", "-etcd-path", "./etcd"]

View file

@ -0,0 +1,28 @@
# build according provided Dockerfile
a1:
build: .
privileged: true
net: etcd-functional
a2:
build: .
privileged: true
net: etcd-functional
a3:
build: .
privileged: true
net: etcd-functional
tester:
build: .
privileged: true
net: etcd-functional
command:
- /etcd-tester
- -agent-endpoints
- "172.20.0.2:9027,172.20.0.3:9027,172.20.0.4:9027"
- -limit
- "1"
- -stress-key-count
- "1"
- -stress-key-size
- "1"

View file

@ -0,0 +1,252 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"syscall"
"time"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
const (
stateUninitialized = "uninitialized"
stateStarted = "started"
stateStopped = "stopped"
stateTerminated = "terminated"
)
type Agent struct {
state string // the state of etcd process
cmd *exec.Cmd
logfile *os.File
cfg AgentConfig
}
type AgentConfig struct {
EtcdPath string
LogDir string
FailpointAddr string
UseRoot bool
}
func newAgent(cfg AgentConfig) (*Agent, error) {
// check if the file exists
_, err := os.Stat(cfg.EtcdPath)
if err != nil {
return nil, err
}
c := exec.Command(cfg.EtcdPath)
err = fileutil.TouchDirAll(cfg.LogDir)
if err != nil {
return nil, err
}
var f *os.File
f, err = os.Create(filepath.Join(cfg.LogDir, "etcd.log"))
if err != nil {
return nil, err
}
return &Agent{state: stateUninitialized, cmd: c, logfile: f, cfg: cfg}, nil
}
// start starts a new etcd process with the given args.
func (a *Agent) start(args ...string) error {
a.cmd = exec.Command(a.cmd.Path, args...)
a.cmd.Env = []string{"GOFAIL_HTTP=" + a.cfg.FailpointAddr}
a.cmd.Stdout = a.logfile
a.cmd.Stderr = a.logfile
err := a.cmd.Start()
if err != nil {
return err
}
a.state = stateStarted
return nil
}
// stop stops the existing etcd process the agent started.
func (a *Agent) stopWithSig(sig os.Signal) error {
if a.state != stateStarted {
return nil
}
err := stopWithSig(a.cmd, sig)
if err != nil {
return err
}
a.state = stateStopped
return nil
}
func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
err := cmd.Process.Signal(sig)
if err != nil {
return err
}
errc := make(chan error)
go func() {
_, ew := cmd.Process.Wait()
errc <- ew
close(errc)
}()
select {
case <-time.After(5 * time.Second):
cmd.Process.Kill()
case e := <-errc:
return e
}
err = <-errc
return err
}
// restart restarts the stopped etcd process.
func (a *Agent) restart() error {
return a.start(a.cmd.Args[1:]...)
}
func (a *Agent) cleanup() error {
// exit with stackstrace
if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
return err
}
a.state = stateUninitialized
a.logfile.Close()
if err := archiveLogAndDataDir(a.cfg.LogDir, a.dataDir()); err != nil {
return err
}
if err := fileutil.TouchDirAll(a.cfg.LogDir); err != nil {
return err
}
f, err := os.Create(filepath.Join(a.cfg.LogDir, "etcd.log"))
if err != nil {
return err
}
a.logfile = f
// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
// https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
if err := cmd.Run(); err != nil {
plog.Infof("error when cleaning page cache (%v)", err)
}
return nil
}
// terminate stops the exiting etcd process the agent started
// and removes the data dir.
func (a *Agent) terminate() error {
err := a.stopWithSig(syscall.SIGTERM)
if err != nil {
return err
}
err = os.RemoveAll(a.dataDir())
if err != nil {
return err
}
a.state = stateTerminated
return nil
}
func (a *Agent) dropPort(port int) error {
if !a.cfg.UseRoot {
return nil
}
return netutil.DropPort(port)
}
func (a *Agent) recoverPort(port int) error {
if !a.cfg.UseRoot {
return nil
}
return netutil.RecoverPort(port)
}
func (a *Agent) setLatency(ms, rv int) error {
if !a.cfg.UseRoot {
return nil
}
if ms == 0 {
return netutil.RemoveLatency()
}
return netutil.SetLatency(ms, rv)
}
func (a *Agent) status() client.Status {
return client.Status{State: a.state}
}
func (a *Agent) dataDir() string {
datadir := filepath.Join(a.cmd.Path, "*.etcd")
args := a.cmd.Args
// only parse the simple case like "--data-dir /var/lib/etcd"
for i, arg := range args {
if arg == "--data-dir" {
datadir = args[i+1]
break
}
}
return datadir
}
func existDir(fpath string) bool {
st, err := os.Stat(fpath)
if err != nil {
if os.IsNotExist(err) {
return false
}
} else {
return st.IsDir()
}
return false
}
func archiveLogAndDataDir(logDir string, datadir string) error {
dir := filepath.Join("failure_archive", fmt.Sprint(time.Now().Format(time.RFC3339)))
if existDir(dir) {
dir = filepath.Join("failure_archive", fmt.Sprint(time.Now().Add(time.Second).Format(time.RFC3339)))
}
if err := fileutil.TouchDirAll(dir); err != nil {
return err
}
if err := os.Rename(logDir, filepath.Join(dir, filepath.Base(logDir))); err != nil {
if !os.IsNotExist(err) {
return err
}
}
if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
if !os.IsNotExist(err) {
return err
}
}
return nil
}

View file

@ -0,0 +1,92 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
)
var etcdPath = filepath.Join(os.Getenv("GOPATH"), "bin/etcd")
func TestAgentStart(t *testing.T) {
defer os.Remove("etcd.log")
a, dir := newTestAgent(t)
defer a.terminate()
err := a.start("--data-dir", dir)
if err != nil {
t.Fatal(err)
}
}
func TestAgentRestart(t *testing.T) {
defer os.Remove("etcd.log")
a, dir := newTestAgent(t)
defer a.terminate()
err := a.start("--data-dir", dir)
if err != nil {
t.Fatal(err)
}
err = a.stopWithSig(syscall.SIGTERM)
if err != nil {
t.Fatal(err)
}
err = a.restart()
if err != nil {
t.Fatal(err)
}
}
func TestAgentTerminate(t *testing.T) {
defer os.Remove("etcd.log")
a, dir := newTestAgent(t)
err := a.start("--data-dir", dir)
if err != nil {
t.Fatal(err)
}
err = a.terminate()
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(dir); !os.IsNotExist(err) {
t.Fatal(err)
}
}
// newTestAgent creates a test agent and with a temp data directory.
func newTestAgent(t *testing.T) (*Agent, string) {
a, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
return a, dir
}

View file

@ -0,0 +1,118 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import "net/rpc"
type Status struct {
// State gives the human-readable status of an agent (e.g., "started" or "terminated")
State string
// TODO: gather more informations
// TODO: memory usage, raft information, etc..
}
type Agent interface {
ID() uint64
// Start starts a new etcd with the given args on the agent machine.
Start(args ...string) (int, error)
// Stop stops the existing etcd the agent started.
Stop() error
// Restart restarts the existing etcd the agent stopped.
Restart() (int, error)
// Cleanup stops the exiting etcd the agent started, then archives log and its data dir.
Cleanup() error
// Terminate stops the exiting etcd the agent started and removes its data dir.
Terminate() error
// DropPort drops all network packets at the given port.
DropPort(port int) error
// RecoverPort stops dropping all network packets at the given port.
RecoverPort(port int) error
// SetLatency slows down network by introducing latency.
SetLatency(ms, rv int) error
// RemoveLatency removes latency introduced by SetLatency.
RemoveLatency() error
// Status returns the status of etcd on the agent
Status() (Status, error)
}
type agent struct {
endpoint string
rpcClient *rpc.Client
}
func NewAgent(endpoint string) (Agent, error) {
c, err := rpc.DialHTTP("tcp", endpoint)
if err != nil {
return nil, err
}
return &agent{endpoint, c}, nil
}
func (a *agent) Start(args ...string) (int, error) {
var pid int
err := a.rpcClient.Call("Agent.RPCStart", args, &pid)
if err != nil {
return -1, err
}
return pid, nil
}
func (a *agent) Stop() error {
return a.rpcClient.Call("Agent.RPCStop", struct{}{}, nil)
}
func (a *agent) Restart() (int, error) {
var pid int
err := a.rpcClient.Call("Agent.RPCRestart", struct{}{}, &pid)
if err != nil {
return -1, err
}
return pid, nil
}
func (a *agent) Cleanup() error {
return a.rpcClient.Call("Agent.RPCCleanup", struct{}{}, nil)
}
func (a *agent) Terminate() error {
return a.rpcClient.Call("Agent.RPCTerminate", struct{}{}, nil)
}
func (a *agent) DropPort(port int) error {
return a.rpcClient.Call("Agent.RPCDropPort", port, nil)
}
func (a *agent) RecoverPort(port int) error {
return a.rpcClient.Call("Agent.RPCRecoverPort", port, nil)
}
func (a *agent) SetLatency(ms, rv int) error {
return a.rpcClient.Call("Agent.RPCSetLatency", []int{ms, rv}, nil)
}
func (a *agent) RemoveLatency() error {
return a.rpcClient.Call("Agent.RPCRemoveLatency", struct{}{}, nil)
}
func (a *agent) Status() (Status, error) {
var s Status
err := a.rpcClient.Call("Agent.RPCStatus", struct{}{}, &s)
return s, err
}
func (a *agent) ID() uint64 {
panic("not implemented")
}

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package client provides a client implementation to control an etcd-agent.
package client

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-agent is a daemon for controlling an etcd process via HTTP RPC.
package main

View file

@ -0,0 +1,59 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
"github.com/coreos/pkg/capnslog"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-agent")
func main() {
etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary")
etcdLogDir := flag.String("etcd-log-dir", "etcd-log", "directory to store etcd logs")
port := flag.String("port", ":9027", "port to serve agent server")
useRoot := flag.Bool("use-root", true, "use root permissions")
failpointAddr := flag.String("failpoint-addr", ":2381", "interface for gofail's HTTP server")
flag.Parse()
cfg := AgentConfig{
EtcdPath: *etcdPath,
LogDir: *etcdLogDir,
FailpointAddr: *failpointAddr,
UseRoot: *useRoot,
}
if *useRoot && os.Getuid() != 0 {
fmt.Println("got --use-root=true but not root user")
os.Exit(1)
}
if !*useRoot {
fmt.Println("root permissions disabled, agent will not modify network")
}
a, err := newAgent(cfg)
if err != nil {
plog.Fatal(err)
}
a.serveRPC(*port)
var done chan struct{}
<-done
}

View file

@ -0,0 +1,131 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net"
"net/http"
"net/rpc"
"syscall"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
func (a *Agent) serveRPC(port string) {
rpc.Register(a)
rpc.HandleHTTP()
l, e := net.Listen("tcp", port)
if e != nil {
plog.Fatal(e)
}
plog.Println("agent listening on", port)
go http.Serve(l, nil)
}
func (a *Agent) RPCStart(args []string, pid *int) error {
plog.Printf("start etcd with args %v", args)
err := a.start(args...)
if err != nil {
plog.Println("error starting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
return nil
}
func (a *Agent) RPCStop(args struct{}, reply *struct{}) error {
plog.Printf("stop etcd")
err := a.stopWithSig(syscall.SIGTERM)
if err != nil {
plog.Println("error stopping etcd", err)
return err
}
return nil
}
func (a *Agent) RPCRestart(args struct{}, pid *int) error {
plog.Printf("restart etcd")
err := a.restart()
if err != nil {
plog.Println("error restarting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
return nil
}
func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error {
plog.Printf("cleanup etcd")
err := a.cleanup()
if err != nil {
plog.Println("error cleaning up etcd", err)
return err
}
return nil
}
func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
plog.Printf("terminate etcd")
err := a.terminate()
if err != nil {
plog.Println("error terminating etcd", err)
}
return nil
}
func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
plog.Printf("drop port %d", port)
err := a.dropPort(port)
if err != nil {
plog.Println("error dropping port", err)
}
return nil
}
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
plog.Printf("recover port %d", port)
err := a.recoverPort(port)
if err != nil {
plog.Println("error recovering port", err)
}
return nil
}
func (a *Agent) RPCSetLatency(args []int, reply *struct{}) error {
if len(args) != 2 {
return fmt.Errorf("SetLatency needs two args, got (%v)", args)
}
plog.Printf("set latency of %dms (+/- %dms)", args[0], args[1])
err := a.setLatency(args[0], args[1])
if err != nil {
plog.Println("error setting latency", err)
}
return nil
}
func (a *Agent) RPCRemoveLatency(args struct{}, reply *struct{}) error {
plog.Println("removing latency")
err := a.setLatency(0, 0)
if err != nil {
plog.Println("error removing latency")
}
return nil
}
func (a *Agent) RPCStatus(args struct{}, status *client.Status) error {
*status = a.status()
return nil
}

View file

@ -0,0 +1,166 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io/ioutil"
"log"
"net/rpc"
"os"
"testing"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
func init() {
defaultAgent, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
if err != nil {
log.Panic(err)
}
defaultAgent.serveRPC(":9027")
}
func TestRPCStart(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
_, err = os.FindProcess(pid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, pid)
}
}
func TestRPCRestart(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
err = c.Call("Agent.RPCStop", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
var npid int
err = c.Call("Agent.RPCRestart", struct{}{}, &npid)
if err != nil {
t.Fatal(err)
}
if npid == pid {
t.Errorf("pid = %v, want not equal to %d", npid, pid)
}
s, err := os.FindProcess(pid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, pid)
}
_, err = s.Wait()
if err == nil {
t.Errorf("err = nil, want killed error")
}
_, err = os.FindProcess(npid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, npid)
}
}
func TestRPCTerminate(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(dir); !os.IsNotExist(err) {
t.Fatal(err)
}
}
func TestRPCStatus(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
var s client.Status
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
if err != nil {
t.Fatal(err)
}
if s.State != stateTerminated {
t.Errorf("state = %s, want %s", s.State, stateTerminated)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
if err != nil {
t.Fatal(err)
}
if s.State != stateStarted {
t.Errorf("state = %s, want %s", s.State, stateStarted)
}
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
}

View file

@ -0,0 +1,126 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"github.com/coreos/etcd/clientv3/concurrency"
"github.com/spf13/cobra"
)
// NewElectionCommand returns the cobra command for "election runner".
func NewElectionCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "election",
Short: "Performs election operation",
Run: runElectionFunc,
}
cmd.Flags().IntVar(&rounds, "rounds", 100, "number of rounds to run")
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
return cmd
}
func runElectionFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("election does not take any argument"))
}
rcs := make([]roundClient, totalClientConnections)
validatec, releasec := make(chan struct{}, len(rcs)), make(chan struct{}, len(rcs))
for range rcs {
releasec <- struct{}{}
}
eps := endpointsFromFlag(cmd)
dialTimeout := dialTimeoutFromCmd(cmd)
for i := range rcs {
v := fmt.Sprintf("%d", i)
observedLeader := ""
validateWaiters := 0
rcs[i].c = newClient(eps, dialTimeout)
var (
s *concurrency.Session
err error
)
for {
s, err = concurrency.NewSession(rcs[i].c)
if err == nil {
break
}
}
e := concurrency.NewElection(s, "electors")
rcs[i].acquire = func() error {
<-releasec
ctx, cancel := context.WithCancel(context.Background())
go func() {
if ol, ok := <-e.Observe(ctx); ok {
observedLeader = string(ol.Kvs[0].Value)
if observedLeader != v {
cancel()
}
}
}()
err = e.Campaign(ctx, v)
if err == nil {
observedLeader = v
}
if observedLeader == v {
validateWaiters = len(rcs)
}
select {
case <-ctx.Done():
return nil
default:
cancel()
return err
}
}
rcs[i].validate = func() error {
if l, err := e.Leader(context.TODO()); err == nil && l != observedLeader {
return fmt.Errorf("expected leader %q, got %q", observedLeader, l)
}
validatec <- struct{}{}
return nil
}
rcs[i].release = func() error {
for validateWaiters > 0 {
select {
case <-validatec:
validateWaiters--
default:
return fmt.Errorf("waiting on followers")
}
}
if err := e.Resign(context.TODO()); err != nil {
return err
}
if observedLeader == v {
for range rcs {
releasec <- struct{}{}
}
}
observedLeader = ""
return nil
}
}
doRounds(rcs, rounds)
}

View file

@ -0,0 +1,42 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"fmt"
"os"
"github.com/coreos/etcd/client"
)
const (
// http://tldp.org/LDP/abs/html/exitcodes.html
ExitSuccess = iota
ExitError
ExitBadConnection
ExitInvalidInput // for txn, watch command
ExitBadFeature // provided a valid flag with an unsupported value
ExitInterrupted
ExitIO
ExitBadArgs = 128
)
func ExitWithError(code int, err error) {
fmt.Fprintln(os.Stderr, "Error: ", err)
if cerr, ok := err.(*client.ClusterError); ok {
fmt.Fprintln(os.Stderr, cerr.Detail())
}
os.Exit(code)
}

View file

@ -0,0 +1,130 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"fmt"
"log"
"sync"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/spf13/cobra"
)
var (
rounds int // total number of rounds the operation needs to be performed
totalClientConnections int // total number of client connections to be made with server
noOfPrefixes int // total number of prefixes which will be watched upon
watchPerPrefix int // number of watchers per prefix
reqRate int // put request per second
totalKeys int // total number of keys for operation
runningTime time.Duration // time for which operation should be performed
)
// GlobalFlags are flags that defined globally
// and are inherited to all sub-commands.
type GlobalFlags struct {
Endpoints []string
DialTimeout time.Duration
}
type roundClient struct {
c *clientv3.Client
progress int
acquire func() error
validate func() error
release func() error
}
func newClient(eps []string, timeout time.Duration) *clientv3.Client {
c, err := clientv3.New(clientv3.Config{
Endpoints: eps,
DialTimeout: time.Duration(timeout) * time.Second,
})
if err != nil {
log.Fatal(err)
}
return c
}
func doRounds(rcs []roundClient, rounds int) {
var mu sync.Mutex
var wg sync.WaitGroup
wg.Add(len(rcs))
finished := make(chan struct{}, 0)
for i := range rcs {
go func(rc *roundClient) {
defer wg.Done()
for rc.progress < rounds {
for rc.acquire() != nil { /* spin */
}
mu.Lock()
if err := rc.validate(); err != nil {
log.Fatal(err)
}
mu.Unlock()
time.Sleep(10 * time.Millisecond)
rc.progress++
finished <- struct{}{}
mu.Lock()
for rc.release() != nil {
mu.Unlock()
mu.Lock()
}
mu.Unlock()
}
}(&rcs[i])
}
start := time.Now()
for i := 1; i < len(rcs)*rounds+1; i++ {
select {
case <-finished:
if i%100 == 0 {
fmt.Printf("finished %d, took %v\n", i, time.Since(start))
start = time.Now()
}
case <-time.After(time.Minute):
log.Panic("no progress after 1 minute!")
}
}
wg.Wait()
for _, rc := range rcs {
rc.c.Close()
}
}
func endpointsFromFlag(cmd *cobra.Command) []string {
endpoints, err := cmd.Flags().GetStringSlice("endpoints")
if err != nil {
ExitWithError(ExitError, err)
}
return endpoints
}
func dialTimeoutFromCmd(cmd *cobra.Command) time.Duration {
dialTimeout, err := cmd.Flags().GetDuration("dial-timeout")
if err != nil {
ExitWithError(ExitError, err)
}
return dialTimeout
}

View file

@ -0,0 +1,86 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"log"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/spf13/cobra"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
// NewLeaseRenewerCommand returns the cobra command for "lease-renewer runner".
func NewLeaseRenewerCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "lease-renewer",
Short: "Performs lease renew operation",
Run: runLeaseRenewerFunc,
}
return cmd
}
func runLeaseRenewerFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("lease-renewer does not take any argument"))
}
eps := endpointsFromFlag(cmd)
dialTimeout := dialTimeoutFromCmd(cmd)
c := newClient(eps, dialTimeout)
ctx := context.Background()
for {
var (
l *clientv3.LeaseGrantResponse
lk *clientv3.LeaseKeepAliveResponse
err error
)
for {
l, err = c.Lease.Grant(ctx, 5)
if err == nil {
break
}
}
expire := time.Now().Add(time.Duration(l.TTL-1) * time.Second)
for {
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
if grpc.Code(err) == codes.NotFound {
if time.Since(expire) < 0 {
log.Printf("bad renew! exceeded: %v", time.Since(expire))
for {
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
fmt.Println(lk, err)
time.Sleep(time.Second)
}
}
log.Printf("lost lease %d, expire: %v\n", l.ID, expire)
break
}
if err != nil {
continue
}
expire = time.Now().Add(time.Duration(lk.TTL-1) * time.Second)
log.Printf("renewed lease %d, expire: %v\n", lk.ID, expire)
time.Sleep(time.Duration(lk.TTL-2) * time.Second)
}
}
}

View file

@ -0,0 +1,81 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"github.com/coreos/etcd/clientv3/concurrency"
"github.com/spf13/cobra"
)
// NewLockRacerCommand returns the cobra command for "lock-racer runner".
func NewLockRacerCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "lock-racer",
Short: "Performs lock race operation",
Run: runRacerFunc,
}
cmd.Flags().IntVar(&rounds, "rounds", 100, "number of rounds to run")
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
return cmd
}
func runRacerFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("lock-racer does not take any argument"))
}
rcs := make([]roundClient, totalClientConnections)
ctx := context.Background()
cnt := 0
eps := endpointsFromFlag(cmd)
dialTimeout := dialTimeoutFromCmd(cmd)
for i := range rcs {
var (
s *concurrency.Session
err error
)
rcs[i].c = newClient(eps, dialTimeout)
for {
s, err = concurrency.NewSession(rcs[i].c)
if err == nil {
break
}
}
m := concurrency.NewMutex(s, "racers")
rcs[i].acquire = func() error { return m.Lock(ctx) }
rcs[i].validate = func() error {
if cnt++; cnt != 1 {
return fmt.Errorf("bad lock; count: %d", cnt)
}
return nil
}
rcs[i].release = func() error {
if err := m.Unlock(ctx); err != nil {
return err
}
cnt = 0
return nil
}
}
doRounds(rcs, rounds)
}

View file

@ -0,0 +1,202 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"log"
"sync"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/stringutil"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
)
// NewWatchCommand returns the cobra command for "watcher runner".
func NewWatchCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "watcher",
Short: "Performs watch operation",
Run: runWatcherFunc,
}
cmd.Flags().IntVar(&rounds, "rounds", 100, "number of rounds to run")
cmd.Flags().DurationVar(&runningTime, "running-time", 60, "number of seconds to run")
cmd.Flags().IntVar(&noOfPrefixes, "total-prefixes", 10, "total no of prefixes to use")
cmd.Flags().IntVar(&watchPerPrefix, "watch-per-prefix", 10, "number of watchers per prefix")
cmd.Flags().IntVar(&reqRate, "req-rate", 30, "rate at which put request will be performed")
cmd.Flags().IntVar(&totalKeys, "total-keys", 1000, "total number of keys to watch")
return cmd
}
func runWatcherFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("watcher does not take any argument"))
}
ctx := context.Background()
for round := 0; round < rounds; round++ {
fmt.Println("round", round)
performWatchOnPrefixes(ctx, cmd, round)
}
}
func performWatchOnPrefixes(ctx context.Context, cmd *cobra.Command, round int) {
keyPerPrefix := totalKeys / noOfPrefixes
prefixes := stringutil.UniqueStrings(5, noOfPrefixes)
keys := stringutil.RandomStrings(10, keyPerPrefix)
roundPrefix := fmt.Sprintf("%16x", round)
eps := endpointsFromFlag(cmd)
dialTimeout := dialTimeoutFromCmd(cmd)
var (
revision int64
wg sync.WaitGroup
gr *clientv3.GetResponse
err error
)
client := newClient(eps, dialTimeout)
defer client.Close()
gr, err = getKey(ctx, client, "non-existent")
if err != nil {
log.Fatalf("failed to get the initial revision: %v", err)
}
revision = gr.Header.Revision
ctxt, cancel := context.WithDeadline(ctx, time.Now().Add(runningTime*time.Second))
defer cancel()
// generate and put keys in cluster
limiter := rate.NewLimiter(rate.Limit(reqRate), reqRate)
go func() {
for _, key := range keys {
for _, prefix := range prefixes {
if err = limiter.Wait(ctxt); err != nil {
return
}
if err = putKeyAtMostOnce(ctxt, client, roundPrefix+"-"+prefix+"-"+key); err != nil {
log.Fatalf("failed to put key: %v", err)
return
}
}
}
}()
ctxc, cancelc := context.WithCancel(ctx)
wcs := make([]clientv3.WatchChan, 0)
rcs := make([]*clientv3.Client, 0)
for _, prefix := range prefixes {
for j := 0; j < watchPerPrefix; j++ {
rc := newClient(eps, dialTimeout)
rcs = append(rcs, rc)
watchPrefix := roundPrefix + "-" + prefix
wc := rc.Watch(ctxc, watchPrefix, clientv3.WithPrefix(), clientv3.WithRev(revision))
wcs = append(wcs, wc)
wg.Add(1)
go func() {
defer wg.Done()
checkWatchResponse(wc, watchPrefix, keys)
}()
}
}
wg.Wait()
cancelc()
// verify all watch channels are closed
for e, wc := range wcs {
if _, ok := <-wc; ok {
log.Fatalf("expected wc to be closed, but received %v", e)
}
}
for _, rc := range rcs {
rc.Close()
}
if err = deletePrefix(ctx, client, roundPrefix); err != nil {
log.Fatalf("failed to clean up keys after test: %v", err)
}
}
func checkWatchResponse(wc clientv3.WatchChan, prefix string, keys []string) {
for n := 0; n < len(keys); {
wr, more := <-wc
if !more {
log.Fatalf("expect more keys (received %d/%d) for %s", len(keys), n, prefix)
}
for _, event := range wr.Events {
expectedKey := prefix + "-" + keys[n]
receivedKey := string(event.Kv.Key)
if expectedKey != receivedKey {
log.Fatalf("expected key %q, got %q for prefix : %q\n", expectedKey, receivedKey, prefix)
}
n++
}
}
}
func putKeyAtMostOnce(ctx context.Context, client *clientv3.Client, key string) error {
gr, err := getKey(ctx, client, key)
if err != nil {
return err
}
var modrev int64
if len(gr.Kvs) > 0 {
modrev = gr.Kvs[0].ModRevision
}
for ctx.Err() == nil {
_, err := client.Txn(ctx).If(clientv3.Compare(clientv3.ModRevision(key), "=", modrev)).Then(clientv3.OpPut(key, key)).Commit()
if err == nil {
return nil
}
}
return ctx.Err()
}
func deletePrefix(ctx context.Context, client *clientv3.Client, key string) error {
for ctx.Err() == nil {
if _, err := client.Delete(ctx, key, clientv3.WithPrefix()); err == nil {
return nil
}
}
return ctx.Err()
}
func getKey(ctx context.Context, client *clientv3.Client, key string) (*clientv3.GetResponse, error) {
for ctx.Err() == nil {
if gr, err := client.Get(ctx, key); err == nil {
return gr, nil
}
}
return nil, ctx.Err()
}

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-runner is a program for testing etcd clientv3 features against a fault injected cluster.
package main

View file

@ -0,0 +1,174 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// copied from https://github.com/coreos/rkt/blob/master/rkt/help.go
package main
import (
"bytes"
"fmt"
"io"
"os"
"strings"
"text/tabwriter"
"text/template"
"github.com/coreos/etcd/version"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
var (
commandUsageTemplate *template.Template
templFuncs = template.FuncMap{
"descToLines": func(s string) []string {
// trim leading/trailing whitespace and split into slice of lines
return strings.Split(strings.Trim(s, "\n\t "), "\n")
},
"cmdName": func(cmd *cobra.Command, startCmd *cobra.Command) string {
parts := []string{cmd.Name()}
for cmd.HasParent() && cmd.Parent().Name() != startCmd.Name() {
cmd = cmd.Parent()
parts = append([]string{cmd.Name()}, parts...)
}
return strings.Join(parts, " ")
},
}
)
func init() {
commandUsage := `
{{ $cmd := .Cmd }}\
{{ $cmdname := cmdName .Cmd .Cmd.Root }}\
NAME:
{{ if not .Cmd.HasParent }}\
{{printf "\t%s - %s" .Cmd.Name .Cmd.Short}}
{{else}}\
{{printf "\t%s - %s" $cmdname .Cmd.Short}}
{{end}}\
USAGE:
{{printf "\t%s" .Cmd.UseLine}}
{{ if not .Cmd.HasParent }}\
VERSION:
{{printf "\t%s" .Version}}
{{end}}\
{{if .Cmd.HasSubCommands}}\
API VERSION:
{{printf "\t%s" .APIVersion}}
{{end}}\
{{if .Cmd.HasSubCommands}}\
COMMANDS:
{{range .SubCommands}}\
{{ $cmdname := cmdName . $cmd }}\
{{ if .Runnable }}\
{{printf "\t%s\t%s" $cmdname .Short}}
{{end}}\
{{end}}\
{{end}}\
{{ if .Cmd.Long }}\
DESCRIPTION:
{{range $line := descToLines .Cmd.Long}}{{printf "\t%s" $line}}
{{end}}\
{{end}}\
{{if .Cmd.HasLocalFlags}}\
OPTIONS:
{{.LocalFlags}}\
{{end}}\
{{if .Cmd.HasInheritedFlags}}\
GLOBAL OPTIONS:
{{.GlobalFlags}}\
{{end}}
`[1:]
commandUsageTemplate = template.Must(template.New("command_usage").Funcs(templFuncs).Parse(strings.Replace(commandUsage, "\\\n", "", -1)))
}
func etcdFlagUsages(flagSet *pflag.FlagSet) string {
x := new(bytes.Buffer)
flagSet.VisitAll(func(flag *pflag.Flag) {
if len(flag.Deprecated) > 0 {
return
}
format := ""
if len(flag.Shorthand) > 0 {
format = " -%s, --%s"
} else {
format = " %s --%s"
}
if len(flag.NoOptDefVal) > 0 {
format = format + "["
}
if flag.Value.Type() == "string" {
// put quotes on the value
format = format + "=%q"
} else {
format = format + "=%s"
}
if len(flag.NoOptDefVal) > 0 {
format = format + "]"
}
format = format + "\t%s\n"
shorthand := flag.Shorthand
fmt.Fprintf(x, format, shorthand, flag.Name, flag.DefValue, flag.Usage)
})
return x.String()
}
func getSubCommands(cmd *cobra.Command) []*cobra.Command {
var subCommands []*cobra.Command
for _, subCmd := range cmd.Commands() {
subCommands = append(subCommands, subCmd)
subCommands = append(subCommands, getSubCommands(subCmd)...)
}
return subCommands
}
func usageFunc(cmd *cobra.Command) error {
subCommands := getSubCommands(cmd)
tabOut := getTabOutWithWriter(os.Stdout)
commandUsageTemplate.Execute(tabOut, struct {
Cmd *cobra.Command
LocalFlags string
GlobalFlags string
SubCommands []*cobra.Command
Version string
APIVersion string
}{
cmd,
etcdFlagUsages(cmd.LocalFlags()),
etcdFlagUsages(cmd.InheritedFlags()),
subCommands,
version.Version,
version.APIVersion,
})
tabOut.Flush()
return nil
}
func getTabOutWithWriter(writer io.Writer) *tabwriter.Writer {
aTabOut := new(tabwriter.Writer)
aTabOut.Init(writer, 0, 8, 1, '\t', 0)
return aTabOut
}

View file

@ -0,0 +1,75 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-runner is a command line application that performs tests on etcd.
package main
import (
"log"
"time"
"github.com/coreos/etcd/tools/functional-tester/etcd-runner/command"
"github.com/spf13/cobra"
)
const (
cliName = "etcd-runner"
cliDescription = "Stress tests using clientv3 functionality.."
defaultDialTimeout = 2 * time.Second
)
var (
globalFlags = command.GlobalFlags{}
)
var (
rootCmd = &cobra.Command{
Use: cliName,
Short: cliDescription,
SuggestFor: []string{"etcd-runner"},
}
)
func init() {
log.SetFlags(log.Lmicroseconds)
rootCmd.PersistentFlags().StringSliceVar(&globalFlags.Endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
rootCmd.PersistentFlags().DurationVar(&globalFlags.DialTimeout, "dial-timeout", defaultDialTimeout, "dial timeout for client connections")
rootCmd.AddCommand(
command.NewElectionCommand(),
command.NewLeaseRenewerCommand(),
command.NewLockRacerCommand(),
command.NewWatchCommand(),
)
}
func init() {
cobra.EnablePrefixMatching = true
}
func Start() {
rootCmd.SetUsageFunc(usageFunc)
// Make help just show the usage
rootCmd.SetHelpTemplate(`{{.UsageString}}`)
if err := rootCmd.Execute(); err != nil {
command.ExitWithError(command.ExitError, err)
}
}
func main() {
Start()
}

View file

@ -0,0 +1,248 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"time"
"google.golang.org/grpc"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
const (
retries = 7
)
type Checker interface {
// Check returns an error if the system fails a consistency check.
Check() error
}
type hashAndRevGetter interface {
getRevisionHash() (revs map[string]int64, hashes map[string]int64, err error)
}
type hashChecker struct {
hrg hashAndRevGetter
}
func newHashChecker(hrg hashAndRevGetter) Checker { return &hashChecker{hrg} }
const leaseCheckerTimeout = 10 * time.Second
func (hc *hashChecker) checkRevAndHashes() (err error) {
var (
revs map[string]int64
hashes map[string]int64
)
// retries in case of transient failure or etcd cluster has not stablized yet.
for i := 0; i < retries; i++ {
revs, hashes, err = hc.hrg.getRevisionHash()
if err != nil {
plog.Warningf("retry %d. failed to retrieve revison and hash (%v)", i, err)
} else {
sameRev := getSameValue(revs)
sameHashes := getSameValue(hashes)
if sameRev && sameHashes {
return nil
}
plog.Warningf("retry %d. etcd cluster is not stable: [revisions: %v] and [hashes: %v]", i, revs, hashes)
}
time.Sleep(time.Second)
}
if err != nil {
return fmt.Errorf("failed revision and hash check (%v)", err)
}
return fmt.Errorf("etcd cluster is not stable: [revisions: %v] and [hashes: %v]", revs, hashes)
}
func (hc *hashChecker) Check() error {
return hc.checkRevAndHashes()
}
type leaseChecker struct {
endpoint string
ls *leaseStresser
leaseClient pb.LeaseClient
kvc pb.KVClient
}
func (lc *leaseChecker) Check() error {
conn, err := grpc.Dial(lc.ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1))
if err != nil {
return fmt.Errorf("%v (%s)", err, lc.ls.endpoint)
}
defer func() {
if conn != nil {
conn.Close()
}
}()
lc.kvc = pb.NewKVClient(conn)
lc.leaseClient = pb.NewLeaseClient(conn)
if err := lc.check(true, lc.ls.revokedLeases.leases); err != nil {
return err
}
if err := lc.check(false, lc.ls.aliveLeases.leases); err != nil {
return err
}
return lc.checkShortLivedLeases()
}
// checkShortLivedLeases ensures leases expire.
func (lc *leaseChecker) checkShortLivedLeases() error {
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
errc := make(chan error)
defer cancel()
for leaseID := range lc.ls.shortLivedLeases.leases {
go func(id int64) {
errc <- lc.checkShortLivedLease(ctx, id)
}(leaseID)
}
var errs []error
for range lc.ls.shortLivedLeases.leases {
if err := <-errc; err != nil {
errs = append(errs, err)
}
}
return errsToError(errs)
}
func (lc *leaseChecker) checkShortLivedLease(ctx context.Context, leaseID int64) (err error) {
// retry in case of transient failure or lease is expired but not yet revoked due to the fact that etcd cluster didn't have enought time to delete it.
var resp *pb.LeaseTimeToLiveResponse
for i := 0; i < retries; i++ {
resp, err = lc.getLeaseByID(ctx, leaseID)
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return nil
}
if err != nil {
plog.Debugf("retry %d. failed to retrieve lease %v error (%v)", i, leaseID, err)
continue
}
if resp.TTL > 0 {
plog.Debugf("lease %v is not expired. sleep for %d until it expires.", leaseID, resp.TTL)
time.Sleep(time.Duration(resp.TTL) * time.Second)
} else {
plog.Debugf("retry %d. lease %v is expired but not yet revoked", i, leaseID)
time.Sleep(time.Second)
}
if err = lc.checkLease(ctx, false, leaseID); err != nil {
continue
}
return nil
}
return err
}
func (lc *leaseChecker) checkLease(ctx context.Context, expired bool, leaseID int64) error {
keysExpired, err := lc.hasKeysAttachedToLeaseExpired(ctx, leaseID)
if err != nil {
plog.Errorf("hasKeysAttachedToLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
return err
}
leaseExpired, err := lc.hasLeaseExpired(ctx, leaseID)
if err != nil {
plog.Errorf("hasLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
return err
}
if leaseExpired != keysExpired {
return fmt.Errorf("lease %v expiration mismatch (lease expired=%v, keys expired=%v)", leaseID, leaseExpired, keysExpired)
}
if leaseExpired != expired {
return fmt.Errorf("lease %v expected expired=%v, got %v", leaseID, expired, leaseExpired)
}
return nil
}
func (lc *leaseChecker) check(expired bool, leases map[int64]time.Time) error {
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
defer cancel()
for leaseID := range leases {
if err := lc.checkLease(ctx, expired, leaseID); err != nil {
return err
}
}
return nil
}
func (lc *leaseChecker) getLeaseByID(ctx context.Context, leaseID int64) (*pb.LeaseTimeToLiveResponse, error) {
ltl := &pb.LeaseTimeToLiveRequest{ID: leaseID, Keys: true}
return lc.leaseClient.LeaseTimeToLive(ctx, ltl, grpc.FailFast(false))
}
func (lc *leaseChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
// keep retrying until lease's state is known or ctx is being canceled
for ctx.Err() == nil {
resp, err := lc.getLeaseByID(ctx, leaseID)
if err == nil {
return false, nil
}
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return true, nil
}
plog.Warningf("hasLeaseExpired %v resp %v error %v (endpoint %q)", leaseID, resp, err, lc.endpoint)
}
return false, ctx.Err()
}
// The keys attached to the lease has the format of "<leaseID>_<idx>" where idx is the ordering key creation
// Since the format of keys contains about leaseID, finding keys base on "<leaseID>" prefix
// determines whether the attached keys for a given leaseID has been deleted or not
func (lc *leaseChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
resp, err := lc.kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("%d", leaseID)),
RangeEnd: []byte(clientv3.GetPrefixRangeEnd(fmt.Sprintf("%d", leaseID))),
}, grpc.FailFast(false))
if err != nil {
plog.Errorf("retrieving keys attached to lease %v error %v (endpoint %q)", leaseID, err, lc.endpoint)
return false, err
}
return len(resp.Kvs) == 0, nil
}
// compositeChecker implements a checker that runs a slice of Checkers concurrently.
type compositeChecker struct{ checkers []Checker }
func newCompositeChecker(checkers []Checker) Checker {
return &compositeChecker{checkers}
}
func (cchecker *compositeChecker) Check() error {
errc := make(chan error)
for _, c := range cchecker.checkers {
go func(chk Checker) { errc <- chk.Check() }(c)
}
var errs []error
for range cchecker.checkers {
if err := <-errc; err != nil {
errs = append(errs, err)
}
}
return errsToError(errs)
}
type noChecker struct{}
func newNoChecker() Checker { return &noChecker{} }
func (nc *noChecker) Check() error { return nil }

View file

@ -0,0 +1,259 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"math/rand"
"net"
"strings"
"time"
"golang.org/x/net/context"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
"google.golang.org/grpc"
)
// agentConfig holds information needed to interact/configure an agent and its etcd process
type agentConfig struct {
endpoint string
clientPort int
peerPort int
failpointPort int
datadir string
}
type cluster struct {
agents []agentConfig
Size int
Members []*member
}
type ClusterStatus struct {
AgentStatuses map[string]client.Status
}
func (c *cluster) bootstrap() error {
size := len(c.agents)
members := make([]*member, size)
memberNameURLs := make([]string, size)
for i, a := range c.agents {
agent, err := client.NewAgent(a.endpoint)
if err != nil {
return err
}
host, _, err := net.SplitHostPort(a.endpoint)
if err != nil {
return err
}
members[i] = &member{
Agent: agent,
Endpoint: a.endpoint,
Name: fmt.Sprintf("etcd-%d", i),
ClientURL: fmt.Sprintf("http://%s:%d", host, a.clientPort),
PeerURL: fmt.Sprintf("http://%s:%d", host, a.peerPort),
FailpointURL: fmt.Sprintf("http://%s:%d", host, a.failpointPort),
}
memberNameURLs[i] = members[i].ClusterEntry()
}
clusterStr := strings.Join(memberNameURLs, ",")
token := fmt.Sprint(rand.Int())
for i, m := range members {
flags := append(
m.Flags(),
"--data-dir", c.agents[i].datadir,
"--initial-cluster-token", token,
"--initial-cluster", clusterStr)
if _, err := m.Agent.Start(flags...); err != nil {
// cleanup
for _, m := range members[:i] {
m.Agent.Terminate()
}
return err
}
}
c.Size = size
c.Members = members
return nil
}
func (c *cluster) Reset() error { return c.bootstrap() }
func (c *cluster) WaitHealth() error {
var err error
// wait 60s to check cluster health.
// TODO: set it to a reasonable value. It is set that high because
// follower may use long time to catch up the leader when reboot under
// reasonable workload (https://github.com/coreos/etcd/issues/2698)
for i := 0; i < 60; i++ {
for _, m := range c.Members {
if err = m.SetHealthKeyV3(); err != nil {
break
}
}
if err == nil {
return nil
}
plog.Warningf("#%d setHealthKey error (%v)", i, err)
time.Sleep(time.Second)
}
return err
}
// GetLeader returns the index of leader and error if any.
func (c *cluster) GetLeader() (int, error) {
for i, m := range c.Members {
isLeader, err := m.IsLeader()
if isLeader || err != nil {
return i, err
}
}
return 0, fmt.Errorf("no leader found")
}
func (c *cluster) Cleanup() error {
var lasterr error
for _, m := range c.Members {
if err := m.Agent.Cleanup(); err != nil {
lasterr = err
}
}
return lasterr
}
func (c *cluster) Terminate() {
for _, m := range c.Members {
m.Agent.Terminate()
}
}
func (c *cluster) Status() ClusterStatus {
cs := ClusterStatus{
AgentStatuses: make(map[string]client.Status),
}
for _, m := range c.Members {
s, err := m.Agent.Status()
// TODO: add a.Desc() as a key of the map
desc := m.Endpoint
if err != nil {
cs.AgentStatuses[desc] = client.Status{State: "unknown"}
plog.Printf("failed to get the status of agent [%s]", desc)
}
cs.AgentStatuses[desc] = s
}
return cs
}
// maxRev returns the maximum revision found on the cluster.
func (c *cluster) maxRev() (rev int64, err error) {
ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
defer cancel()
revc, errc := make(chan int64, len(c.Members)), make(chan error, len(c.Members))
for i := range c.Members {
go func(m *member) {
mrev, merr := m.Rev(ctx)
revc <- mrev
errc <- merr
}(c.Members[i])
}
for i := 0; i < len(c.Members); i++ {
if merr := <-errc; merr != nil {
err = merr
}
if mrev := <-revc; mrev > rev {
rev = mrev
}
}
return rev, err
}
func (c *cluster) getRevisionHash() (map[string]int64, map[string]int64, error) {
revs := make(map[string]int64)
hashes := make(map[string]int64)
for _, m := range c.Members {
rev, hash, err := m.RevHash()
if err != nil {
return nil, nil, err
}
revs[m.ClientURL] = rev
hashes[m.ClientURL] = hash
}
return revs, hashes, nil
}
func (c *cluster) compactKV(rev int64, timeout time.Duration) (err error) {
if rev <= 0 {
return nil
}
for i, m := range c.Members {
u := m.ClientURL
conn, derr := m.dialGRPC()
if derr != nil {
plog.Printf("[compact kv #%d] dial error %v (endpoint %s)", i, derr, u)
err = derr
continue
}
kvc := pb.NewKVClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
plog.Printf("[compact kv #%d] starting (endpoint %s)", i, u)
_, cerr := kvc.Compact(ctx, &pb.CompactionRequest{Revision: rev, Physical: true}, grpc.FailFast(false))
cancel()
conn.Close()
succeed := true
if cerr != nil {
if strings.Contains(cerr.Error(), "required revision has been compacted") && i > 0 {
plog.Printf("[compact kv #%d] already compacted (endpoint %s)", i, u)
} else {
plog.Warningf("[compact kv #%d] error %v (endpoint %s)", i, cerr, u)
err = cerr
succeed = false
}
}
if succeed {
plog.Printf("[compact kv #%d] done (endpoint %s)", i, u)
}
}
return err
}
func (c *cluster) checkCompact(rev int64) error {
if rev == 0 {
return nil
}
for _, m := range c.Members {
if err := m.CheckCompact(rev); err != nil {
return err
}
}
return nil
}
func (c *cluster) defrag() error {
for _, m := range c.Members {
if err := m.Defrag(); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-tester is a single controller for all etcd-agents to manage an etcd cluster and simulate failures.
package main

View file

@ -0,0 +1,155 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"io/ioutil"
"net/http"
"strings"
"sync"
"time"
)
type failpointStats struct {
// crashes counts the number of crashes for a failpoint
crashes map[string]int
// mu protects crashes
mu sync.Mutex
}
var fpStats failpointStats
func failpointFailures(c *cluster) (ret []failure, err error) {
var fps []string
fps, err = failpointPaths(c.Members[0].FailpointURL)
if err != nil {
return nil, err
}
// create failure objects for all failpoints
for _, fp := range fps {
if len(fp) == 0 {
continue
}
fpFails := failuresFromFailpoint(fp)
// wrap in delays so failpoint has time to trigger
for i, fpf := range fpFails {
if strings.Contains(fp, "Snap") {
// hack to trigger snapshot failpoints
fpFails[i] = &failureUntilSnapshot{fpf}
} else {
fpFails[i] = &failureDelay{fpf, 3 * time.Second}
}
}
ret = append(ret, fpFails...)
}
fpStats.crashes = make(map[string]int)
return ret, err
}
func failpointPaths(endpoint string) ([]string, error) {
resp, err := http.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, rerr := ioutil.ReadAll(resp.Body)
if rerr != nil {
return nil, rerr
}
var fps []string
for _, l := range strings.Split(string(body), "\n") {
fp := strings.Split(l, "=")[0]
fps = append(fps, fp)
}
return fps, nil
}
func failuresFromFailpoint(fp string) []failure {
inject := makeInjectFailpoint(fp, `panic("etcd-tester")`)
recov := makeRecoverFailpoint(fp)
return []failure{
&failureOne{
description: description("failpoint " + fp + " panic one"),
injectMember: inject,
recoverMember: recov,
},
&failureAll{
description: description("failpoint " + fp + " panic all"),
injectMember: inject,
recoverMember: recov,
},
&failureMajority{
description: description("failpoint " + fp + " panic majority"),
injectMember: inject,
recoverMember: recov,
},
&failureLeader{
failureByFunc{
description: description("failpoint " + fp + " panic leader"),
injectMember: inject,
recoverMember: recov,
},
0,
},
}
}
func makeInjectFailpoint(fp, val string) injectMemberFunc {
return func(m *member) (err error) {
return putFailpoint(m.FailpointURL, fp, val)
}
}
func makeRecoverFailpoint(fp string) recoverMemberFunc {
return func(m *member) error {
if err := delFailpoint(m.FailpointURL, fp); err == nil {
return nil
}
// node not responding, likely dead from fp panic; restart
fpStats.mu.Lock()
fpStats.crashes[fp]++
fpStats.mu.Unlock()
return recoverStop(m)
}
}
func putFailpoint(ep, fp, val string) error {
req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
}
return nil
}
func delFailpoint(ep, fp string) error {
req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
}
return nil
}

View file

@ -0,0 +1,202 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"math/rand"
"os/exec"
"time"
)
type failure interface {
// Inject injeccts the failure into the testing cluster at the given
// round. When calling the function, the cluster should be in health.
Inject(c *cluster, round int) error
// Recover recovers the injected failure caused by the injection of the
// given round and wait for the recovery of the testing cluster.
Recover(c *cluster, round int) error
// Desc returns a description of the failure
Desc() string
}
type description string
func (d description) Desc() string { return string(d) }
type injectMemberFunc func(*member) error
type recoverMemberFunc func(*member) error
type failureByFunc struct {
description
injectMember injectMemberFunc
recoverMember recoverMemberFunc
}
type failureOne failureByFunc
type failureAll failureByFunc
type failureMajority failureByFunc
type failureLeader struct {
failureByFunc
idx int
}
type failureDelay struct {
failure
delayDuration time.Duration
}
// failureUntilSnapshot injects a failure and waits for a snapshot event
type failureUntilSnapshot struct{ failure }
func (f *failureOne) Inject(c *cluster, round int) error {
return f.injectMember(c.Members[round%c.Size])
}
func (f *failureOne) Recover(c *cluster, round int) error {
if err := f.recoverMember(c.Members[round%c.Size]); err != nil {
return err
}
return c.WaitHealth()
}
func (f *failureAll) Inject(c *cluster, round int) error {
for _, m := range c.Members {
if err := f.injectMember(m); err != nil {
return err
}
}
return nil
}
func (f *failureAll) Recover(c *cluster, round int) error {
for _, m := range c.Members {
if err := f.recoverMember(m); err != nil {
return err
}
}
return c.WaitHealth()
}
func (f *failureMajority) Inject(c *cluster, round int) error {
for i := range killMap(c.Size, round) {
if err := f.injectMember(c.Members[i]); err != nil {
return err
}
}
return nil
}
func (f *failureMajority) Recover(c *cluster, round int) error {
for i := range killMap(c.Size, round) {
if err := f.recoverMember(c.Members[i]); err != nil {
return err
}
}
return nil
}
func (f *failureLeader) Inject(c *cluster, round int) error {
idx, err := c.GetLeader()
if err != nil {
return err
}
f.idx = idx
return f.injectMember(c.Members[idx])
}
func (f *failureLeader) Recover(c *cluster, round int) error {
if err := f.recoverMember(c.Members[f.idx]); err != nil {
return err
}
return c.WaitHealth()
}
func (f *failureDelay) Inject(c *cluster, round int) error {
if err := f.failure.Inject(c, round); err != nil {
return err
}
time.Sleep(f.delayDuration)
return nil
}
func (f *failureUntilSnapshot) Inject(c *cluster, round int) error {
if err := f.failure.Inject(c, round); err != nil {
return err
}
if c.Size < 3 {
return nil
}
// maxRev may fail since failure just injected, retry if failed.
startRev, err := c.maxRev()
for i := 0; i < 10 && startRev == 0; i++ {
startRev, err = c.maxRev()
}
if startRev == 0 {
return err
}
lastRev := startRev
// Normal healthy cluster could accept 1000req/s at least.
// Give it 3-times time to create a new snapshot.
retry := snapshotCount / 1000 * 3
for j := 0; j < retry; j++ {
lastRev, _ = c.maxRev()
// If the number of proposals committed is bigger than snapshot count,
// a new snapshot should have been created.
if lastRev-startRev > snapshotCount {
return nil
}
time.Sleep(time.Second)
}
return fmt.Errorf("cluster too slow: only commit %d requests in %ds", lastRev-startRev, retry)
}
func (f *failureUntilSnapshot) Desc() string {
return f.failure.Desc() + " for a long time and expect it to recover from an incoming snapshot"
}
func killMap(size int, seed int) map[int]bool {
m := make(map[int]bool)
r := rand.New(rand.NewSource(int64(seed)))
majority := size/2 + 1
for {
m[r.Intn(size)] = true
if len(m) >= majority {
return m
}
}
}
type failureNop failureByFunc
func (f *failureNop) Inject(c *cluster, round int) error { return nil }
func (f *failureNop) Recover(c *cluster, round int) error { return nil }
type failureExternal struct {
failure
description string
scriptPath string
}
func (f *failureExternal) Inject(c *cluster, round int) error {
return exec.Command(f.scriptPath, "enable", fmt.Sprintf("%d", round)).Run()
}
func (f *failureExternal) Recover(c *cluster, round int) error {
return exec.Command(f.scriptPath, "disable", fmt.Sprintf("%d", round)).Run()
}
func (f *failureExternal) Desc() string { return f.description }

View file

@ -0,0 +1,154 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"time"
)
const (
snapshotCount = 10000
slowNetworkLatency = 500 // 500 millisecond
randomVariation = 50
// Wait more when it recovers from slow network, because network layer
// needs extra time to propagate traffic control (tc command) change.
// Otherwise, we get different hash values from the previous revision.
// For more detail, please see https://github.com/coreos/etcd/issues/5121.
waitRecover = 5 * time.Second
)
func injectStop(m *member) error { return m.Agent.Stop() }
func recoverStop(m *member) error {
_, err := m.Agent.Restart()
return err
}
func newFailureKillAll() failure {
return &failureAll{
description: "kill all members",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillMajority() failure {
return &failureMajority{
description: "kill majority of the cluster",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillOne() failure {
return &failureOne{
description: "kill one random member",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillLeader() failure {
ff := failureByFunc{
description: "kill leader member",
injectMember: injectStop,
recoverMember: recoverStop,
}
return &failureLeader{ff, 0}
}
func newFailureKillOneForLongTime() failure {
return &failureUntilSnapshot{newFailureKillOne()}
}
func newFailureKillLeaderForLongTime() failure {
return &failureUntilSnapshot{newFailureKillLeader()}
}
func injectDropPort(m *member) error { return m.Agent.DropPort(m.peerPort()) }
func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
func newFailureIsolate() failure {
return &failureOne{
description: "isolate one member",
injectMember: injectDropPort,
recoverMember: recoverDropPort,
}
}
func newFailureIsolateAll() failure {
return &failureAll{
description: "isolate all members",
injectMember: injectDropPort,
recoverMember: recoverDropPort,
}
}
func injectLatency(m *member) error {
if err := m.Agent.SetLatency(slowNetworkLatency, randomVariation); err != nil {
m.Agent.RemoveLatency()
return err
}
return nil
}
func recoverLatency(m *member) error {
if err := m.Agent.RemoveLatency(); err != nil {
return err
}
time.Sleep(waitRecover)
return nil
}
func newFailureSlowNetworkOneMember() failure {
desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
return &failureOne{
description: description(desc),
injectMember: injectLatency,
recoverMember: recoverLatency,
}
}
func newFailureSlowNetworkLeader() failure {
desc := fmt.Sprintf("slow down leader's network by adding %d ms latency", slowNetworkLatency)
ff := failureByFunc{
description: description(desc),
injectMember: injectLatency,
recoverMember: recoverLatency,
}
return &failureLeader{ff, 0}
}
func newFailureSlowNetworkAll() failure {
return &failureAll{
description: "slow down all members' network",
injectMember: injectLatency,
recoverMember: recoverLatency,
}
}
func newFailureNop() failure {
return &failureNop{
description: "no failure",
}
}
func newFailureExternal(scriptPath string) failure {
return &failureExternal{
description: fmt.Sprintf("external fault injector (script: %s)", scriptPath),
scriptPath: scriptPath,
}
}

View file

@ -0,0 +1,44 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"net/http"
)
type statusHandler struct {
status *Status
}
func (sh statusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
en := json.NewEncoder(w)
sh.status.mu.Lock()
defer sh.status.mu.Unlock()
if err := en.Encode(Status{
Since: sh.status.Since,
Failures: sh.status.Failures,
RoundLimit: sh.status.RoundLimit,
Cluster: sh.status.cluster.Status(),
cluster: sh.status.cluster,
Round: sh.status.Round,
Case: sh.status.Case,
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}

View file

@ -0,0 +1,243 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"golang.org/x/net/context" // grpc does a comparison on context.Cancel; can't use "context" package
"golang.org/x/time/rate"
"google.golang.org/grpc"
"google.golang.org/grpc/transport"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
type keyStresser struct {
Endpoint string
keyLargeSize int
keySize int
keySuffixRange int
N int
rateLimiter *rate.Limiter
wg sync.WaitGroup
cancel func()
conn *grpc.ClientConn
// atomicModifiedKeys records the number of keys created and deleted by the stresser.
atomicModifiedKeys int64
stressTable *stressTable
}
func (s *keyStresser) Stress() error {
// TODO: add backoff option
conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure())
if err != nil {
return fmt.Errorf("%v (%s)", err, s.Endpoint)
}
ctx, cancel := context.WithCancel(context.Background())
s.wg.Add(s.N)
s.conn = conn
s.cancel = cancel
kvc := pb.NewKVClient(conn)
var stressEntries = []stressEntry{
{weight: 0.7, f: newStressPut(kvc, s.keySuffixRange, s.keySize)},
{
weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize),
f: newStressPut(kvc, s.keySuffixRange, s.keyLargeSize),
},
{weight: 0.07, f: newStressRange(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressRangeInterval(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressDelete(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressDeleteInterval(kvc, s.keySuffixRange)},
}
s.stressTable = createStressTable(stressEntries)
for i := 0; i < s.N; i++ {
go s.run(ctx)
}
plog.Infof("keyStresser %q is started", s.Endpoint)
return nil
}
func (s *keyStresser) run(ctx context.Context) {
defer s.wg.Done()
for {
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
return
}
// TODO: 10-second is enough timeout to cover leader failure
// and immediate leader election. Find out what other cases this
// could be timed out.
sctx, scancel := context.WithTimeout(ctx, 10*time.Second)
err, modifiedKeys := s.stressTable.choose()(sctx)
scancel()
if err == nil {
atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
continue
}
switch grpc.ErrorDesc(err) {
case context.DeadlineExceeded.Error():
// This retries when request is triggered at the same time as
// leader failure. When we terminate the leader, the request to
// that leader cannot be processed, and times out. Also requests
// to followers cannot be forwarded to the old leader, so timing out
// as well. We want to keep stressing until the cluster elects a
// new leader and start processing requests again.
case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error():
// This retries when request is triggered at the same time as
// leader failure and follower nodes receive time out errors
// from losing their leader. Followers should retry to connect
// to the new leader.
case etcdserver.ErrStopped.Error():
// one of the etcd nodes stopped from failure injection
case transport.ErrConnClosing.Desc:
// server closed the transport (failure injected node)
case rpctypes.ErrNotCapable.Error():
// capability check has not been done (in the beginning)
case rpctypes.ErrTooManyRequests.Error():
// hitting the recovering member.
case context.Canceled.Error():
// from stresser.Cancel method:
return
case grpc.ErrClientConnClosing.Error():
// from stresser.Cancel method:
return
default:
plog.Errorf("keyStresser %v exited with error (%v)", s.Endpoint, err)
return
}
}
}
func (s *keyStresser) Cancel() {
s.cancel()
s.conn.Close()
s.wg.Wait()
plog.Infof("keyStresser %q is canceled", s.Endpoint)
}
func (s *keyStresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&s.atomicModifiedKeys)
}
func (s *keyStresser) Checker() Checker { return nil }
type stressFunc func(ctx context.Context) (err error, modifiedKeys int64)
type stressEntry struct {
weight float32
f stressFunc
}
type stressTable struct {
entries []stressEntry
sumWeights float32
}
func createStressTable(entries []stressEntry) *stressTable {
st := stressTable{entries: entries}
for _, entry := range st.entries {
st.sumWeights += entry.weight
}
return &st
}
func (st *stressTable) choose() stressFunc {
v := rand.Float32() * st.sumWeights
var sum float32
var idx int
for i := range st.entries {
sum += st.entries[i].weight
if sum >= v {
idx = i
break
}
}
return st.entries[idx].f
}
func newStressPut(kvc pb.KVClient, keySuffixRange, keySize int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.Put(ctx, &pb.PutRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
Value: randBytes(keySize),
}, grpc.FailFast(false))
return err, 1
}
}
func newStressRange(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
}, grpc.FailFast(false))
return err, 0
}
}
func newStressRangeInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
start := rand.Intn(keySuffixRange)
end := start + 500
_, err := kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", start)),
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
}, grpc.FailFast(false))
return err, 0
}
}
func newStressDelete(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
}, grpc.FailFast(false))
return err, 1
}
}
func newStressDeleteInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
start := rand.Intn(keySuffixRange)
end := start + 500
resp, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", start)),
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
}, grpc.FailFast(false))
if err == nil {
return nil, resp.Deleted
}
return err, 0
}
}

View file

@ -0,0 +1,377 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
"golang.org/x/time/rate"
"google.golang.org/grpc"
)
const (
// time to live for lease
TTL = 120
TTLShort = 2
)
type leaseStresser struct {
endpoint string
cancel func()
conn *grpc.ClientConn
kvc pb.KVClient
lc pb.LeaseClient
ctx context.Context
rateLimiter *rate.Limiter
// atomicModifiedKey records the number of keys created and deleted during a test case
atomicModifiedKey int64
numLeases int
keysPerLease int
aliveLeases *atomicLeases
revokedLeases *atomicLeases
shortLivedLeases *atomicLeases
runWg sync.WaitGroup
aliveWg sync.WaitGroup
}
type atomicLeases struct {
// rwLock is used to protect read/write access of leases map
// which are accessed and modified by different go routines.
rwLock sync.RWMutex
leases map[int64]time.Time
}
func (al *atomicLeases) add(leaseID int64, t time.Time) {
al.rwLock.Lock()
al.leases[leaseID] = t
al.rwLock.Unlock()
}
func (al *atomicLeases) update(leaseID int64, t time.Time) {
al.rwLock.Lock()
_, ok := al.leases[leaseID]
if ok {
al.leases[leaseID] = t
}
al.rwLock.Unlock()
}
func (al *atomicLeases) read(leaseID int64) (rv time.Time, ok bool) {
al.rwLock.RLock()
rv, ok = al.leases[leaseID]
al.rwLock.RUnlock()
return rv, ok
}
func (al *atomicLeases) remove(leaseID int64) {
al.rwLock.Lock()
delete(al.leases, leaseID)
al.rwLock.Unlock()
}
func (al *atomicLeases) getLeasesMap() map[int64]time.Time {
leasesCopy := make(map[int64]time.Time)
al.rwLock.RLock()
for k, v := range al.leases {
leasesCopy[k] = v
}
al.rwLock.RUnlock()
return leasesCopy
}
func (ls *leaseStresser) setupOnce() error {
if ls.aliveLeases != nil {
return nil
}
if ls.numLeases == 0 {
panic("expect numLeases to be set")
}
if ls.keysPerLease == 0 {
panic("expect keysPerLease to be set")
}
ls.aliveLeases = &atomicLeases{leases: make(map[int64]time.Time)}
return nil
}
func (ls *leaseStresser) Stress() error {
plog.Infof("lease Stresser %v starting ...", ls.endpoint)
if err := ls.setupOnce(); err != nil {
return err
}
conn, err := grpc.Dial(ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1*time.Second))
if err != nil {
return fmt.Errorf("%v (%s)", err, ls.endpoint)
}
ls.conn = conn
ls.kvc = pb.NewKVClient(conn)
ls.lc = pb.NewLeaseClient(conn)
ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
ctx, cancel := context.WithCancel(context.Background())
ls.cancel = cancel
ls.ctx = ctx
ls.runWg.Add(1)
go ls.run()
return nil
}
func (ls *leaseStresser) run() {
defer ls.runWg.Done()
ls.restartKeepAlives()
for {
// the number of keys created and deleted is roughly 2x the number of created keys for an iteration.
// the rateLimiter therefore consumes 2x ls.numLeases*ls.keysPerLease tokens where each token represents a create/delete operation for key.
err := ls.rateLimiter.WaitN(ls.ctx, 2*ls.numLeases*ls.keysPerLease)
if err == context.Canceled {
return
}
plog.Debugf("creating lease on %v", ls.endpoint)
ls.createLeases()
plog.Debugf("done creating lease on %v", ls.endpoint)
plog.Debugf("dropping lease on %v", ls.endpoint)
ls.randomlyDropLeases()
plog.Debugf("done dropping lease on %v", ls.endpoint)
}
}
func (ls *leaseStresser) restartKeepAlives() {
for leaseID := range ls.aliveLeases.getLeasesMap() {
ls.aliveWg.Add(1)
go func(id int64) {
ls.keepLeaseAlive(id)
}(leaseID)
}
}
func (ls *leaseStresser) createLeases() {
ls.createAliveLeases()
ls.createShortLivedLeases()
}
func (ls *leaseStresser) createAliveLeases() {
neededLeases := ls.numLeases - len(ls.aliveLeases.getLeasesMap())
var wg sync.WaitGroup
for i := 0; i < neededLeases; i++ {
wg.Add(1)
go func() {
defer wg.Done()
leaseID, err := ls.createLeaseWithKeys(TTL)
if err != nil {
plog.Debugf("lease creation error: (%v)", err)
return
}
ls.aliveLeases.add(leaseID, time.Now())
// keep track of all the keep lease alive go routines
ls.aliveWg.Add(1)
go ls.keepLeaseAlive(leaseID)
}()
}
wg.Wait()
}
func (ls *leaseStresser) createShortLivedLeases() {
// one round of createLeases() might not create all the short lived leases we want due to falures.
// thus, we want to create remaining short lived leases in the future round.
neededLeases := ls.numLeases - len(ls.shortLivedLeases.getLeasesMap())
var wg sync.WaitGroup
for i := 0; i < neededLeases; i++ {
wg.Add(1)
go func() {
defer wg.Done()
leaseID, err := ls.createLeaseWithKeys(TTLShort)
if err != nil {
return
}
ls.shortLivedLeases.add(leaseID, time.Now())
}()
}
wg.Wait()
}
func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
leaseID, err := ls.createLease(ttl)
if err != nil {
plog.Debugf("lease creation error: (%v)", err)
return -1, err
}
plog.Debugf("lease %v created ", leaseID)
if err := ls.attachKeysWithLease(leaseID); err != nil {
return -1, err
}
return leaseID, nil
}
func (ls *leaseStresser) randomlyDropLeases() {
var wg sync.WaitGroup
for l := range ls.aliveLeases.getLeasesMap() {
wg.Add(1)
go func(leaseID int64) {
defer wg.Done()
dropped, err := ls.randomlyDropLease(leaseID)
// if randomlyDropLease encountered an error such as context is cancelled, remove the lease from aliveLeases
// because we can't tell whether the lease is dropped or not.
if err != nil {
plog.Debugf("drop lease %v has failed error (%v)", leaseID, err)
ls.aliveLeases.remove(leaseID)
return
}
if !dropped {
return
}
plog.Debugf("lease %v dropped", leaseID)
ls.revokedLeases.add(leaseID, time.Now())
ls.aliveLeases.remove(leaseID)
}(l)
}
wg.Wait()
}
func (ls *leaseStresser) createLease(ttl int64) (int64, error) {
resp, err := ls.lc.LeaseGrant(ls.ctx, &pb.LeaseGrantRequest{TTL: ttl})
if err != nil {
return -1, err
}
return resp.ID, nil
}
func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
defer ls.aliveWg.Done()
ctx, cancel := context.WithCancel(ls.ctx)
stream, err := ls.lc.LeaseKeepAlive(ctx)
defer func() { cancel() }()
for {
select {
case <-time.After(500 * time.Millisecond):
case <-ls.ctx.Done():
plog.Debugf("keepLeaseAlive lease %v context canceled ", leaseID)
// it is possible that lease expires at invariant checking phase but not at keepLeaseAlive() phase.
// this scenerio is possible when alive lease is just about to expire when keepLeaseAlive() exists and expires at invariant checking phase.
// to circumvent that scenerio, we check each lease before keepalive loop exist to see if it has been renewed in last TTL/2 duration.
// if it is renewed, this means that invariant checking have at least ttl/2 time before lease exipres which is long enough for the checking to finish.
// if it is not renewed, we remove the lease from the alive map so that the lease doesn't exipre during invariant checking
renewTime, ok := ls.aliveLeases.read(leaseID)
if ok && renewTime.Add(TTL/2*time.Second).Before(time.Now()) {
ls.aliveLeases.remove(leaseID)
plog.Debugf("keepLeaseAlive lease %v has not been renewed. drop it.", leaseID)
}
return
}
if err != nil {
plog.Debugf("keepLeaseAlive lease %v creates stream error: (%v)", leaseID, err)
cancel()
ctx, cancel = context.WithCancel(ls.ctx)
stream, err = ls.lc.LeaseKeepAlive(ctx)
continue
}
err = stream.Send(&pb.LeaseKeepAliveRequest{ID: leaseID})
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request", leaseID)
if err != nil {
plog.Debugf("keepLeaseAlive stream sends lease %v error (%v)", leaseID, err)
continue
}
leaseRenewTime := time.Now()
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request succeed", leaseID)
respRC, err := stream.Recv()
if err != nil {
plog.Debugf("keepLeaseAlive stream receives lease %v stream error (%v)", leaseID, err)
continue
}
// lease expires after TTL become 0
// don't send keepalive if the lease has expired
if respRC.TTL <= 0 {
plog.Debugf("keepLeaseAlive stream receives lease %v has TTL <= 0", leaseID)
ls.aliveLeases.remove(leaseID)
return
}
// renew lease timestamp only if lease is present
plog.Debugf("keepLeaseAlive renew lease %v", leaseID)
ls.aliveLeases.update(leaseID, leaseRenewTime)
}
}
// attachKeysWithLease function attaches keys to the lease.
// the format of key is the concat of leaseID + '_' + '<order of key creation>'
// e.g 5186835655248304152_0 for first created key and 5186835655248304152_1 for second created key
func (ls *leaseStresser) attachKeysWithLease(leaseID int64) error {
var txnPuts []*pb.RequestOp
for j := 0; j < ls.keysPerLease; j++ {
txnput := &pb.RequestOp{Request: &pb.RequestOp_RequestPut{RequestPut: &pb.PutRequest{Key: []byte(fmt.Sprintf("%d%s%d", leaseID, "_", j)),
Value: []byte(fmt.Sprintf("bar")), Lease: leaseID}}}
txnPuts = append(txnPuts, txnput)
}
// keep retrying until lease is not found or ctx is being canceled
for ls.ctx.Err() == nil {
txn := &pb.TxnRequest{Success: txnPuts}
_, err := ls.kvc.Txn(ls.ctx, txn)
if err == nil {
// since all created keys will be deleted too, the number of operations on keys will be roughly 2x the number of created keys
atomic.AddInt64(&ls.atomicModifiedKey, 2*int64(ls.keysPerLease))
return nil
}
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return err
}
}
return ls.ctx.Err()
}
// randomlyDropLease drops the lease only when the rand.Int(2) returns 1.
// This creates a 50/50 percents chance of dropping a lease
func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
if rand.Intn(2) != 0 {
return false, nil
}
// keep retrying until a lease is dropped or ctx is being canceled
for ls.ctx.Err() == nil {
_, err := ls.lc.LeaseRevoke(ls.ctx, &pb.LeaseRevokeRequest{ID: leaseID})
if err == nil || rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return true, nil
}
}
plog.Debugf("randomlyDropLease error: (%v)", ls.ctx.Err())
return false, ls.ctx.Err()
}
func (ls *leaseStresser) Cancel() {
plog.Debugf("lease stresser %q is canceling...", ls.endpoint)
ls.cancel()
ls.runWg.Wait()
ls.aliveWg.Wait()
ls.conn.Close()
plog.Infof("lease stresser %q is canceled", ls.endpoint)
}
func (ls *leaseStresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&ls.atomicModifiedKey)
}
func (ls *leaseStresser) Checker() Checker { return &leaseChecker{endpoint: ls.endpoint, ls: ls} }

View file

@ -0,0 +1,215 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"net/http"
"net/http/pprof"
"os"
"strings"
"github.com/coreos/pkg/capnslog"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/time/rate"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
const (
defaultClientPort = 2379
defaultPeerPort = 2380
defaultFailpointPort = 2381
)
const pprofPrefix = "/debug/pprof-tester"
func main() {
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.")
stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
stressKeySize := flag.Uint("stress-key-size", 100, "the size of each small key written into etcd.")
stressKeySuffixRange := flag.Uint("stress-key-count", 250000, "the count of key range written into etcd.")
limit := flag.Int("limit", -1, "the limit of rounds to run failure set (-1 to run without limits).")
stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.")
schedCases := flag.String("schedule-cases", "", "test case schedule")
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop).")
failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").")
externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector")
enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof")
flag.Parse()
eps := strings.Split(*endpointStr, ",")
cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
agents := make([]agentConfig, len(eps))
for i := range eps {
agents[i].endpoint = eps[i]
agents[i].clientPort = cports[i]
agents[i].peerPort = pports[i]
agents[i].failpointPort = fports[i]
agents[i].datadir = *datadir
}
c := &cluster{agents: agents}
if err := c.bootstrap(); err != nil {
plog.Fatal(err)
}
defer c.Terminate()
// ensure cluster is fully booted to know failpoints are available
c.WaitHealth()
var failures []failure
if failureTypes != nil && *failureTypes != "" {
failures = makeFailures(*failureTypes, c)
}
if externalFailures != nil && *externalFailures != "" {
if len(failures) != 0 {
plog.Errorf("specify only one of -failures or -external-failures")
os.Exit(1)
}
failures = append(failures, newFailureExternal(*externalFailures))
}
if len(failures) == 0 {
plog.Infof("no failures\n")
failures = append(failures, newFailureNop())
}
schedule := failures
if schedCases != nil && *schedCases != "" {
cases := strings.Split(*schedCases, " ")
schedule = make([]failure, len(cases))
for i := range cases {
caseNum := 0
n, err := fmt.Sscanf(cases[i], "%d", &caseNum)
if n == 0 || err != nil {
plog.Fatalf(`couldn't parse case "%s" (%v)`, cases[i], err)
}
schedule[i] = failures[caseNum]
}
}
scfg := stressConfig{
rateLimiter: rate.NewLimiter(rate.Limit(*stressQPS), *stressQPS),
keyLargeSize: int(*stressKeyLargeSize),
keySize: int(*stressKeySize),
keySuffixRange: int(*stressKeySuffixRange),
numLeases: 10,
keysPerLease: 10,
}
t := &tester{
failures: schedule,
cluster: c,
limit: *limit,
scfg: scfg,
stresserType: *stresserType,
doChecks: *consistencyCheck,
}
sh := statusHandler{status: &t.status}
http.Handle("/status", sh)
http.Handle("/metrics", prometheus.Handler())
if *enablePprof {
http.Handle(pprofPrefix+"/", http.HandlerFunc(pprof.Index))
http.Handle(pprofPrefix+"/profile", http.HandlerFunc(pprof.Profile))
http.Handle(pprofPrefix+"/symbol", http.HandlerFunc(pprof.Symbol))
http.Handle(pprofPrefix+"/cmdline", http.HandlerFunc(pprof.Cmdline))
http.Handle(pprofPrefix+"/trace", http.HandlerFunc(pprof.Trace))
http.Handle(pprofPrefix+"/heap", pprof.Handler("heap"))
http.Handle(pprofPrefix+"/goroutine", pprof.Handler("goroutine"))
http.Handle(pprofPrefix+"/threadcreate", pprof.Handler("threadcreate"))
http.Handle(pprofPrefix+"/block", pprof.Handler("block"))
}
go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
t.runLoop()
}
// portsFromArg converts a comma separated list into a slice of ints
func portsFromArg(arg string, n, defaultPort int) []int {
ret := make([]int, n)
if len(arg) == 0 {
for i := range ret {
ret[i] = defaultPort
}
return ret
}
s := strings.Split(arg, ",")
if len(s) != n {
fmt.Printf("expected %d ports, got %d (%s)\n", n, len(s), arg)
os.Exit(1)
}
for i := range s {
if _, err := fmt.Sscanf(s[i], "%d", &ret[i]); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
return ret
}
func makeFailures(types string, c *cluster) []failure {
var failures []failure
fails := strings.Split(types, ",")
for i := range fails {
switch fails[i] {
case "default":
defaultFailures := []failure{
newFailureKillAll(),
newFailureKillMajority(),
newFailureKillOne(),
newFailureKillLeader(),
newFailureKillOneForLongTime(),
newFailureKillLeaderForLongTime(),
newFailureIsolate(),
newFailureIsolateAll(),
newFailureSlowNetworkOneMember(),
newFailureSlowNetworkLeader(),
newFailureSlowNetworkAll(),
}
failures = append(failures, defaultFailures...)
case "failpoints":
fpFailures, fperr := failpointFailures(c)
if len(fpFailures) == 0 {
plog.Infof("no failpoints found (%v)", fperr)
}
failures = append(failures, fpFailures...)
default:
plog.Errorf("unknown failure: %s\n", fails[i])
os.Exit(1)
}
}
return failures
}

View file

@ -0,0 +1,182 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net"
"net/url"
"time"
"golang.org/x/net/context"
"google.golang.org/grpc"
"github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
type member struct {
Agent client.Agent
Endpoint string
Name string
ClientURL string
PeerURL string
FailpointURL string
}
func (m *member) ClusterEntry() string { return m.Name + "=" + m.PeerURL }
func (m *member) Flags() []string {
return []string{
"--name", m.Name,
"--listen-client-urls", m.ClientURL,
"--advertise-client-urls", m.ClientURL,
"--listen-peer-urls", m.PeerURL,
"--initial-advertise-peer-urls", m.PeerURL,
"--initial-cluster-state", "new",
}
}
func (m *member) CheckCompact(rev int64) error {
cli, err := m.newClientV3()
if err != nil {
return fmt.Errorf("%v (endpoint %s)", err, m.ClientURL)
}
defer cli.Close()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
wch := cli.Watch(ctx, "\x00", clientv3.WithFromKey(), clientv3.WithRev(rev-1))
wr, ok := <-wch
cancel()
if !ok {
return fmt.Errorf("watch channel terminated (endpoint %s)", m.ClientURL)
}
if wr.CompactRevision != rev {
return fmt.Errorf("got compact revision %v, wanted %v (endpoint %s)", wr.CompactRevision, rev, m.ClientURL)
}
return nil
}
func (m *member) Defrag() error {
plog.Printf("defragmenting %s\n", m.ClientURL)
cli, err := m.newClientV3()
if err != nil {
return err
}
defer cli.Close()
if _, err = cli.Defragment(context.Background(), m.ClientURL); err != nil {
return err
}
plog.Printf("defragmented %s\n", m.ClientURL)
return nil
}
func (m *member) RevHash() (int64, int64, error) {
conn, err := m.dialGRPC()
if err != nil {
return 0, 0, err
}
mt := pb.NewMaintenanceClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := mt.Hash(ctx, &pb.HashRequest{}, grpc.FailFast(false))
cancel()
conn.Close()
if err != nil {
return 0, 0, err
}
return resp.Header.Revision, int64(resp.Hash), nil
}
func (m *member) Rev(ctx context.Context) (int64, error) {
cli, err := m.newClientV3()
if err != nil {
return 0, err
}
defer cli.Close()
resp, err := cli.Status(ctx, m.ClientURL)
if err != nil {
return 0, err
}
return resp.Header.Revision, nil
}
func (m *member) IsLeader() (bool, error) {
cli, err := m.newClientV3()
if err != nil {
return false, err
}
defer cli.Close()
resp, err := cli.Status(context.Background(), m.ClientURL)
if err != nil {
return false, err
}
return resp.Header.MemberId == resp.Leader, nil
}
func (m *member) SetHealthKeyV3() error {
cli, err := m.newClientV3()
if err != nil {
return fmt.Errorf("%v (%s)", err, m.ClientURL)
}
defer cli.Close()
// give enough time-out in case expensive requests (range/delete) are pending
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
_, err = cli.Put(ctx, "health", "good")
cancel()
if err != nil {
return fmt.Errorf("%v (%s)", err, m.ClientURL)
}
return nil
}
func (m *member) newClientV3() (*clientv3.Client, error) {
return clientv3.New(clientv3.Config{
Endpoints: []string{m.ClientURL},
DialTimeout: 5 * time.Second,
})
}
func (m *member) dialGRPC() (*grpc.ClientConn, error) {
return grpc.Dial(m.grpcAddr(), grpc.WithInsecure(), grpc.WithTimeout(5*time.Second), grpc.WithBlock())
}
// grpcAddr gets the host from clientURL so it works with grpc.Dial()
func (m *member) grpcAddr() string {
u, err := url.Parse(m.ClientURL)
if err != nil {
panic(err)
}
return u.Host
}
func (m *member) peerPort() (port int) {
u, err := url.Parse(m.PeerURL)
if err != nil {
panic(err)
}
_, portStr, err := net.SplitHostPort(u.Host)
if err != nil {
panic(err)
}
if _, err = fmt.Sscanf(portStr, "%d", &port); err != nil {
panic(err)
}
return port
}

View file

@ -0,0 +1,64 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"github.com/prometheus/client_golang/prometheus"
)
var (
caseTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "case_total",
Help: "Total number of finished test cases",
},
[]string{"desc"},
)
caseFailedTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "case_failed_total",
Help: "Total number of failed test cases",
},
[]string{"desc"},
)
roundTotalCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "round_total",
Help: "Total number of finished test rounds.",
})
roundFailedTotalCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "round_failed_total",
Help: "Total number of failed test rounds.",
})
)
func init() {
prometheus.MustRegister(caseTotalCounter)
prometheus.MustRegister(caseFailedTotalCounter)
prometheus.MustRegister(roundTotalCounter)
prometheus.MustRegister(roundFailedTotalCounter)
}

View file

@ -0,0 +1,57 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"sync"
"time"
)
type Status struct {
Since time.Time
Failures []string
RoundLimit int
Cluster ClusterStatus
cluster *cluster
mu sync.Mutex // guards Round and Case
Round int
Case int
}
func (s *Status) setRound(r int) {
s.mu.Lock()
defer s.mu.Unlock()
s.Round = r
}
func (s *Status) getRound() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.Round
}
func (s *Status) setCase(c int) {
s.mu.Lock()
defer s.mu.Unlock()
s.Case = c
}
func (s *Status) getCase() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.Case
}

View file

@ -0,0 +1,156 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"strings"
"sync"
"time"
"golang.org/x/time/rate"
"google.golang.org/grpc/grpclog"
)
func init() { grpclog.SetLogger(plog) }
type Stresser interface {
// Stress starts to stress the etcd cluster
Stress() error
// Cancel cancels the stress test on the etcd cluster
Cancel()
// ModifiedKeys reports the number of keys created and deleted by stresser
ModifiedKeys() int64
// Checker returns an invariant checker for after the stresser is canceled.
Checker() Checker
}
// nopStresser implements Stresser that does nothing
type nopStresser struct {
start time.Time
qps int
}
func (s *nopStresser) Stress() error { return nil }
func (s *nopStresser) Cancel() {}
func (s *nopStresser) ModifiedKeys() int64 {
return 0
}
func (s *nopStresser) Checker() Checker { return nil }
// compositeStresser implements a Stresser that runs a slice of
// stressers concurrently.
type compositeStresser struct {
stressers []Stresser
}
func (cs *compositeStresser) Stress() error {
for i, s := range cs.stressers {
if err := s.Stress(); err != nil {
for j := 0; j < i; j++ {
cs.stressers[i].Cancel()
}
return err
}
}
return nil
}
func (cs *compositeStresser) Cancel() {
var wg sync.WaitGroup
wg.Add(len(cs.stressers))
for i := range cs.stressers {
go func(s Stresser) {
defer wg.Done()
s.Cancel()
}(cs.stressers[i])
}
wg.Wait()
}
func (cs *compositeStresser) ModifiedKeys() (modifiedKey int64) {
for _, stress := range cs.stressers {
modifiedKey += stress.ModifiedKeys()
}
return modifiedKey
}
func (cs *compositeStresser) Checker() Checker {
var chks []Checker
for _, s := range cs.stressers {
if chk := s.Checker(); chk != nil {
chks = append(chks, chk)
}
}
if len(chks) == 0 {
return nil
}
return newCompositeChecker(chks)
}
type stressConfig struct {
keyLargeSize int
keySize int
keySuffixRange int
numLeases int
keysPerLease int
rateLimiter *rate.Limiter
}
// NewStresser creates stresser from a comma separated list of stresser types.
func NewStresser(s string, sc *stressConfig, m *member) Stresser {
types := strings.Split(s, ",")
if len(types) > 1 {
stressers := make([]Stresser, len(types))
for i, stype := range types {
stressers[i] = NewStresser(stype, sc, m)
}
return &compositeStresser{stressers}
}
switch s {
case "nop":
return &nopStresser{start: time.Now(), qps: int(sc.rateLimiter.Limit())}
case "keys":
// TODO: Too intensive stressers can panic etcd member with
// 'out of memory' error. Put rate limits in server side.
return &keyStresser{
Endpoint: m.grpcAddr(),
keyLargeSize: sc.keyLargeSize,
keySize: sc.keySize,
keySuffixRange: sc.keySuffixRange,
N: 100,
rateLimiter: sc.rateLimiter,
}
case "v2keys":
return &v2Stresser{
Endpoint: m.ClientURL,
keySize: sc.keySize,
keySuffixRange: sc.keySuffixRange,
N: 100,
rateLimiter: sc.rateLimiter,
}
case "lease":
return &leaseStresser{
endpoint: m.grpcAddr(),
numLeases: sc.numLeases,
keysPerLease: sc.keysPerLease,
rateLimiter: sc.rateLimiter,
}
default:
plog.Panicf("unknown stresser type: %s\n", s)
}
return nil // never reach here
}

View file

@ -0,0 +1,265 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"time"
)
type tester struct {
cluster *cluster
limit int
failures []failure
status Status
currentRevision int64
stresserType string
scfg stressConfig
doChecks bool
stresser Stresser
checker Checker
}
// compactQPS is rough number of compact requests per second.
// Previous tests showed etcd can compact about 60,000 entries per second.
const compactQPS = 50000
func (tt *tester) runLoop() {
tt.status.Since = time.Now()
tt.status.RoundLimit = tt.limit
tt.status.cluster = tt.cluster
for _, f := range tt.failures {
tt.status.Failures = append(tt.status.Failures, f.Desc())
}
if err := tt.resetStressCheck(); err != nil {
plog.Errorf("%s failed to start stresser (%v)", tt.logPrefix(), err)
return
}
var preModifiedKey int64
for round := 0; round < tt.limit || tt.limit == -1; round++ {
tt.status.setRound(round)
roundTotalCounter.Inc()
if err := tt.doRound(round); err != nil {
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
if tt.cleanup() != nil {
return
}
// reset preModifiedKey after clean up
preModifiedKey = 0
continue
}
// -1 so that logPrefix doesn't print out 'case'
tt.status.setCase(-1)
revToCompact := max(0, tt.currentRevision-10000)
currentModifiedKey := tt.stresser.ModifiedKeys()
modifiedKey := currentModifiedKey - preModifiedKey
preModifiedKey = currentModifiedKey
timeout := 10 * time.Second
timeout += time.Duration(modifiedKey/compactQPS) * time.Second
plog.Infof("%s compacting %d modifications (timeout %v)", tt.logPrefix(), modifiedKey, timeout)
if err := tt.compact(revToCompact, timeout); err != nil {
plog.Warningf("%s functional-tester compact got error (%v)", tt.logPrefix(), err)
if tt.cleanup() != nil {
return
}
// reset preModifiedKey after clean up
preModifiedKey = 0
}
if round > 0 && round%500 == 0 { // every 500 rounds
if err := tt.defrag(); err != nil {
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
return
}
}
}
plog.Infof("%s functional-tester is finished", tt.logPrefix())
}
func (tt *tester) doRound(round int) error {
for j, f := range tt.failures {
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
tt.status.setCase(j)
if err := tt.cluster.WaitHealth(); err != nil {
return fmt.Errorf("wait full health error: %v", err)
}
plog.Infof("%s injecting failure %q", tt.logPrefix(), f.Desc())
if err := f.Inject(tt.cluster, round); err != nil {
return fmt.Errorf("injection error: %v", err)
}
plog.Infof("%s injected failure", tt.logPrefix())
plog.Infof("%s recovering failure %q", tt.logPrefix(), f.Desc())
if err := f.Recover(tt.cluster, round); err != nil {
return fmt.Errorf("recovery error: %v", err)
}
plog.Infof("%s recovered failure", tt.logPrefix())
tt.cancelStresser()
plog.Infof("%s wait until cluster is healthy", tt.logPrefix())
if err := tt.cluster.WaitHealth(); err != nil {
return fmt.Errorf("wait full health error: %v", err)
}
plog.Infof("%s cluster is healthy", tt.logPrefix())
plog.Infof("%s checking consistency and invariant of cluster", tt.logPrefix())
if err := tt.checkConsistency(); err != nil {
return fmt.Errorf("tt.checkConsistency error (%v)", err)
}
plog.Infof("%s checking consistency and invariant of cluster done", tt.logPrefix())
plog.Infof("%s succeed!", tt.logPrefix())
}
return nil
}
func (tt *tester) updateRevision() error {
revs, _, err := tt.cluster.getRevisionHash()
for _, rev := range revs {
tt.currentRevision = rev
break // just need get one of the current revisions
}
plog.Infof("%s updated current revision to %d", tt.logPrefix(), tt.currentRevision)
return err
}
func (tt *tester) checkConsistency() (err error) {
defer func() {
if err != nil {
return
}
if err = tt.updateRevision(); err != nil {
plog.Warningf("%s functional-tester returning with tt.updateRevision error (%v)", tt.logPrefix(), err)
return
}
err = tt.startStresser()
}()
if err = tt.checker.Check(); err != nil {
plog.Infof("%s %v", tt.logPrefix(), err)
}
return err
}
func (tt *tester) compact(rev int64, timeout time.Duration) (err error) {
tt.cancelStresser()
defer func() {
if err == nil {
err = tt.startStresser()
}
}()
plog.Infof("%s compacting storage (current revision %d, compact revision %d)", tt.logPrefix(), tt.currentRevision, rev)
if err = tt.cluster.compactKV(rev, timeout); err != nil {
return err
}
plog.Infof("%s compacted storage (compact revision %d)", tt.logPrefix(), rev)
plog.Infof("%s checking compaction (compact revision %d)", tt.logPrefix(), rev)
if err = tt.cluster.checkCompact(rev); err != nil {
plog.Warningf("%s checkCompact error (%v)", tt.logPrefix(), err)
return err
}
plog.Infof("%s confirmed compaction (compact revision %d)", tt.logPrefix(), rev)
return nil
}
func (tt *tester) defrag() error {
plog.Infof("%s defragmenting...", tt.logPrefix())
if err := tt.cluster.defrag(); err != nil {
plog.Warningf("%s defrag error (%v)", tt.logPrefix(), err)
if cerr := tt.cleanup(); cerr != nil {
return fmt.Errorf("%s, %s", err, cerr)
}
return err
}
plog.Infof("%s defragmented...", tt.logPrefix())
return nil
}
func (tt *tester) logPrefix() string {
var (
rd = tt.status.getRound()
cs = tt.status.getCase()
prefix = fmt.Sprintf("[round#%d case#%d]", rd, cs)
)
if cs == -1 {
prefix = fmt.Sprintf("[round#%d]", rd)
}
return prefix
}
func (tt *tester) cleanup() error {
roundFailedTotalCounter.Inc()
desc := "compact/defrag"
if tt.status.Case != -1 {
desc = tt.failures[tt.status.Case].Desc()
}
caseFailedTotalCounter.WithLabelValues(desc).Inc()
tt.cancelStresser()
if err := tt.cluster.Cleanup(); err != nil {
plog.Warningf("%s cleanup error: %v", tt.logPrefix(), err)
return err
}
if err := tt.cluster.Reset(); err != nil {
plog.Warningf("%s cleanup Bootstrap error: %v", tt.logPrefix(), err)
return err
}
return tt.resetStressCheck()
}
func (tt *tester) cancelStresser() {
plog.Infof("%s canceling the stressers...", tt.logPrefix())
tt.stresser.Cancel()
plog.Infof("%s canceled stressers", tt.logPrefix())
}
func (tt *tester) startStresser() (err error) {
plog.Infof("%s starting the stressers...", tt.logPrefix())
err = tt.stresser.Stress()
plog.Infof("%s started stressers", tt.logPrefix())
return err
}
func (tt *tester) resetStressCheck() error {
plog.Infof("%s resetting stressers and checkers...", tt.logPrefix())
cs := &compositeStresser{}
for _, m := range tt.cluster.Members {
s := NewStresser(tt.stresserType, &tt.scfg, m)
cs.stressers = append(cs.stressers, s)
}
tt.stresser = cs
if !tt.doChecks {
tt.checker = newNoChecker()
return tt.startStresser()
}
chk := newHashChecker(hashAndRevGetter(tt.cluster))
if schk := cs.Checker(); schk != nil {
chk = newCompositeChecker([]Checker{chk, schk})
}
tt.checker = chk
return tt.startStresser()
}
func (tt *tester) Report() int64 { return tt.stresser.ModifiedKeys() }

View file

@ -0,0 +1,51 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"strings"
)
func getSameValue(vals map[string]int64) bool {
var rv int64
for _, v := range vals {
if rv == 0 {
rv = v
}
if rv != v {
return false
}
}
return true
}
func max(n1, n2 int64) int64 {
if n1 > n2 {
return n1
}
return n2
}
func errsToError(errs []error) error {
if len(errs) == 0 {
return nil
}
stringArr := make([]string, len(errs))
for i, err := range errs {
stringArr[i] = err.Error()
}
return fmt.Errorf(strings.Join(stringArr, ", "))
}

View file

@ -0,0 +1,113 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"math/rand"
"net"
"net/http"
"sync"
"sync/atomic"
"time"
"golang.org/x/time/rate"
clientV2 "github.com/coreos/etcd/client"
)
type v2Stresser struct {
Endpoint string
keySize int
keySuffixRange int
N int
rateLimiter *rate.Limiter
wg sync.WaitGroup
atomicModifiedKey int64
cancel func()
}
func (s *v2Stresser) Stress() error {
cfg := clientV2.Config{
Endpoints: []string{s.Endpoint},
Transport: &http.Transport{
Dial: (&net.Dialer{
Timeout: time.Second,
KeepAlive: 30 * time.Second,
}).Dial,
MaxIdleConnsPerHost: s.N,
},
}
c, err := clientV2.New(cfg)
if err != nil {
return err
}
kv := clientV2.NewKeysAPI(c)
ctx, cancel := context.WithCancel(context.Background())
s.cancel = cancel
s.wg.Add(s.N)
for i := 0; i < s.N; i++ {
go func() {
defer s.wg.Done()
s.run(ctx, kv)
}()
}
return nil
}
func (s *v2Stresser) run(ctx context.Context, kv clientV2.KeysAPI) {
for {
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
return
}
setctx, setcancel := context.WithTimeout(ctx, clientV2.DefaultRequestTimeout)
key := fmt.Sprintf("foo%016x", rand.Intn(s.keySuffixRange))
_, err := kv.Set(setctx, key, string(randBytes(s.keySize)), nil)
if err == nil {
atomic.AddInt64(&s.atomicModifiedKey, 1)
}
setcancel()
if err == context.Canceled {
return
}
}
}
func (s *v2Stresser) Cancel() {
s.cancel()
s.wg.Wait()
}
func (s *v2Stresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&s.atomicModifiedKey)
}
func (s *v2Stresser) Checker() Checker { return nil }
func randBytes(size int) []byte {
data := make([]byte, size)
for i := 0; i < size; i++ {
data[i] = byte(int('a') + rand.Intn(26))
}
return data
}

View file

@ -0,0 +1,23 @@
#!/bin/sh -e
set -x
set -e
# 1. build etcd binaries
[ -f bin/etcd ] || ./build
# 2. build agent & tester
[ -f bin/etcd-agent -a -f bin/etcd-tester ] || ./tools/functional-tester/build
# 3. build docker image (alpine based)
mkdir -p ./tools/functional-tester/docker/bin
cp -v bin/etcd-agent bin/etcd-tester bin/etcd ./tools/functional-tester/docker/bin
docker-compose -f tools/functional-tester/docker/docker-compose.yml build
# 4. create network (assumption - no overlaps)
docker network ls | grep etcd-functional || docker network create --subnet 172.20.0.0/16 etcd-functional
# 5. run cluster and tester (assumption - agents'll get first ip addresses)
docker-compose -f tools/functional-tester/docker/docker-compose.yml up -d a1 a2 a3
# 6. run tester
docker-compose -f tools/functional-tester/docker/docker-compose.yml run tester

View file

@ -0,0 +1,21 @@
# Use goreman to run `go get github.com/mattn/goreman`
# peer bridges
pbridge1: tools/local-tester/bridge/bridge 127.0.0.1:11111 127.0.0.1:12380
pbridge2: tools/local-tester/bridge/bridge 127.0.0.1:22222 127.0.0.1:22380
pbridge3: tools/local-tester/bridge/bridge 127.0.0.1:33333 127.0.0.1:32380
# client bridges
cbridge1: tools/local-tester/bridge/bridge 127.0.0.1:2379 127.0.0.1:11119
cbridge2: tools/local-tester/bridge/bridge 127.0.0.1:22379 127.0.0.1:22229
cbridge3: tools/local-tester/bridge/bridge 127.0.0.1:32379 127.0.0.1:33339
faults: tools/local-tester/faults.sh
stress-put: tools/benchmark/benchmark --endpoints=127.0.0.1:2379,127.0.0.1:22379,127.0.0.1:32379 --clients=27 --conns=3 put --sequential-keys --key-space-size=100000 --total=100000
etcd1: GOFAIL_HTTP="127.0.0.1:11180" bin/etcd --name infra1 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:11119 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:11111 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
etcd2: GOFAIL_HTTP="127.0.0.1:22280" bin/etcd --name infra2 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:22229 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22222 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
etcd3: GOFAIL_HTTP="127.0.0.1:33380" bin/etcd --name infra3 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:33339 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:33333 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
# in future, use proxy to listen on 2379
#proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof

View file

@ -0,0 +1,25 @@
# etcd local-tester
The etcd local-tester runs a fault injected cluster using local processes. It sets up an etcd cluster with unreliable network bridges on its peer and client interfaces. The cluster runs with a constant stream of `Put` requests to simulate client usage. A fault injection script periodically kills cluster members and disrupts bridge connectivity.
# Requirements
local-tester depends on `goreman` to manage its processes and `bash` to run fault injection.
# Building
local-tester needs `etcd`, `benchmark`, and `bridge` binaries. To build these binaries, run the following from the etcd repository root:
```sh
./build
pushd tools/benchmark/ && go build && popd
pushd tools/local-tester/bridge && go build && popd
```
# Running
The fault injected cluster is invoked with `goreman`:
```sh
goreman -f tools/local-tester/Procfile start
```

View file

@ -0,0 +1,280 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package main is the entry point for the local tester network bridge.
package main
import (
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"net"
"sync"
"time"
)
type bridgeConn struct {
in net.Conn
out net.Conn
d dispatcher
}
func newBridgeConn(in net.Conn, d dispatcher) (*bridgeConn, error) {
out, err := net.Dial("tcp", flag.Args()[1])
if err != nil {
in.Close()
return nil, err
}
return &bridgeConn{in, out, d}, nil
}
func (b *bridgeConn) String() string {
return fmt.Sprintf("%v <-> %v", b.in.RemoteAddr(), b.out.RemoteAddr())
}
func (b *bridgeConn) Close() {
b.in.Close()
b.out.Close()
}
func bridge(b *bridgeConn) {
log.Println("bridging", b.String())
go b.d.Copy(b.out, makeFetch(b.in))
b.d.Copy(b.in, makeFetch(b.out))
}
func timeBridge(b *bridgeConn) {
go func() {
t := time.Duration(rand.Intn(5)+1) * time.Second
time.Sleep(t)
log.Printf("killing connection %s after %v\n", b.String(), t)
b.Close()
}()
bridge(b)
}
func blackhole(b *bridgeConn) {
log.Println("blackholing connection", b.String())
io.Copy(ioutil.Discard, b.in)
b.Close()
}
func readRemoteOnly(b *bridgeConn) {
log.Println("one way (<-)", b.String())
b.d.Copy(b.in, makeFetch(b.out))
}
func writeRemoteOnly(b *bridgeConn) {
log.Println("one way (->)", b.String())
b.d.Copy(b.out, makeFetch(b.in))
}
func corruptReceive(b *bridgeConn) {
log.Println("corruptReceive", b.String())
go b.d.Copy(b.in, makeFetchCorrupt(makeFetch(b.out)))
b.d.Copy(b.out, makeFetch(b.in))
}
func corruptSend(b *bridgeConn) {
log.Println("corruptSend", b.String())
go b.d.Copy(b.out, makeFetchCorrupt(makeFetch(b.in)))
b.d.Copy(b.in, makeFetch(b.out))
}
func makeFetch(c io.Reader) fetchFunc {
return func() ([]byte, error) {
b := make([]byte, 4096)
n, err := c.Read(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func makeFetchCorrupt(f func() ([]byte, error)) fetchFunc {
return func() ([]byte, error) {
b, err := f()
if err != nil {
return nil, err
}
// corrupt one byte approximately every 16K
for i := 0; i < len(b); i++ {
if rand.Intn(16*1024) == 0 {
b[i] = b[i] + 1
}
}
return b, nil
}
}
func makeFetchRand(f func() ([]byte, error)) fetchFunc {
return func() ([]byte, error) {
if rand.Intn(10) == 0 {
return nil, fmt.Errorf("fetchRand: done")
}
b, err := f()
if err != nil {
return nil, err
}
return b, nil
}
}
func randomBlackhole(b *bridgeConn) {
log.Println("random blackhole: connection", b.String())
var wg sync.WaitGroup
wg.Add(2)
go func() {
b.d.Copy(b.in, makeFetchRand(makeFetch(b.out)))
wg.Done()
}()
go func() {
b.d.Copy(b.out, makeFetchRand(makeFetch(b.in)))
wg.Done()
}()
wg.Wait()
b.Close()
}
type config struct {
delayAccept bool
resetListen bool
connFaultRate float64
immediateClose bool
blackhole bool
timeClose bool
writeRemoteOnly bool
readRemoteOnly bool
randomBlackhole bool
corruptSend bool
corruptReceive bool
reorder bool
}
type acceptFaultFunc func()
type connFaultFunc func(*bridgeConn)
func main() {
var cfg config
flag.BoolVar(&cfg.delayAccept, "delay-accept", true, "delays accepting new connections")
flag.BoolVar(&cfg.resetListen, "reset-listen", true, "resets the listening port")
flag.Float64Var(&cfg.connFaultRate, "conn-fault-rate", 0.25, "rate of faulty connections")
flag.BoolVar(&cfg.immediateClose, "immediate-close", true, "close after accept")
flag.BoolVar(&cfg.blackhole, "blackhole", true, "reads nothing, writes go nowhere")
flag.BoolVar(&cfg.timeClose, "time-close", true, "close after random time")
flag.BoolVar(&cfg.writeRemoteOnly, "write-remote-only", true, "only write, no read")
flag.BoolVar(&cfg.readRemoteOnly, "read-remote-only", true, "only read, no write")
flag.BoolVar(&cfg.randomBlackhole, "random-blackhole", true, "blackhole after data xfer")
flag.BoolVar(&cfg.corruptReceive, "corrupt-receive", true, "corrupt packets received from destination")
flag.BoolVar(&cfg.corruptSend, "corrupt-send", true, "corrupt packets sent to destination")
flag.BoolVar(&cfg.reorder, "reorder", true, "reorder packet delivery")
flag.Parse()
lAddr := flag.Args()[0]
fwdAddr := flag.Args()[1]
log.Println("listening on ", lAddr)
log.Println("forwarding to ", fwdAddr)
l, err := net.Listen("tcp", lAddr)
if err != nil {
log.Fatal(err)
}
defer l.Close()
acceptFaults := []acceptFaultFunc{func() {}}
if cfg.delayAccept {
f := func() {
log.Println("delaying accept")
time.Sleep(3 * time.Second)
}
acceptFaults = append(acceptFaults, f)
}
if cfg.resetListen {
f := func() {
log.Println("reset listen port")
l.Close()
newListener, err := net.Listen("tcp", lAddr)
if err != nil {
log.Fatal(err)
}
l = newListener
}
acceptFaults = append(acceptFaults, f)
}
connFaults := []connFaultFunc{func(b *bridgeConn) { bridge(b) }}
if cfg.immediateClose {
f := func(b *bridgeConn) {
log.Printf("terminating connection %s immediately", b.String())
b.Close()
}
connFaults = append(connFaults, f)
}
if cfg.blackhole {
connFaults = append(connFaults, blackhole)
}
if cfg.timeClose {
connFaults = append(connFaults, timeBridge)
}
if cfg.writeRemoteOnly {
connFaults = append(connFaults, writeRemoteOnly)
}
if cfg.readRemoteOnly {
connFaults = append(connFaults, readRemoteOnly)
}
if cfg.randomBlackhole {
connFaults = append(connFaults, randomBlackhole)
}
if cfg.corruptSend {
connFaults = append(connFaults, corruptSend)
}
if cfg.corruptReceive {
connFaults = append(connFaults, corruptReceive)
}
var disp dispatcher
if cfg.reorder {
disp = newDispatcherPool()
} else {
disp = newDispatcherImmediate()
}
for {
acceptFaults[rand.Intn(len(acceptFaults))]()
conn, err := l.Accept()
if err != nil {
log.Fatal(err)
}
r := rand.Intn(len(connFaults))
if rand.Intn(100) > int(100.0*cfg.connFaultRate) {
r = 0
}
bc, err := newBridgeConn(conn, disp)
if err != nil {
log.Printf("oops %v", err)
continue
}
go connFaults[r](bc)
}
}

View file

@ -0,0 +1,140 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io"
"math/rand"
"sync"
"time"
)
var (
// dispatchPoolDelay is the time to wait before flushing all buffered packets
dispatchPoolDelay = 100 * time.Millisecond
// dispatchPacketBytes is how many bytes to send until choosing a new connection
dispatchPacketBytes = 32
)
type dispatcher interface {
// Copy works like io.Copy using buffers provided by fetchFunc
Copy(io.Writer, fetchFunc) error
}
type fetchFunc func() ([]byte, error)
type dispatcherPool struct {
// mu protects the dispatch packet queue 'q'
mu sync.Mutex
q []dispatchPacket
}
type dispatchPacket struct {
buf []byte
out io.Writer
}
func newDispatcherPool() dispatcher {
d := &dispatcherPool{}
go d.writeLoop()
return d
}
func (d *dispatcherPool) writeLoop() {
for {
time.Sleep(dispatchPoolDelay)
d.flush()
}
}
func (d *dispatcherPool) flush() {
d.mu.Lock()
pkts := d.q
d.q = nil
d.mu.Unlock()
if len(pkts) == 0 {
return
}
// sort by sockets; preserve the packet ordering within a socket
pktmap := make(map[io.Writer][]dispatchPacket)
outs := []io.Writer{}
for _, pkt := range pkts {
opkts, ok := pktmap[pkt.out]
if !ok {
outs = append(outs, pkt.out)
}
pktmap[pkt.out] = append(opkts, pkt)
}
// send all packets in pkts
for len(outs) != 0 {
// randomize writer on every write
r := rand.Intn(len(outs))
rpkts := pktmap[outs[r]]
rpkts[0].out.Write(rpkts[0].buf)
// dequeue packet
rpkts = rpkts[1:]
if len(rpkts) == 0 {
delete(pktmap, outs[r])
outs = append(outs[:r], outs[r+1:]...)
} else {
pktmap[outs[r]] = rpkts
}
}
}
func (d *dispatcherPool) Copy(w io.Writer, f fetchFunc) error {
for {
b, err := f()
if err != nil {
return err
}
pkts := []dispatchPacket{}
for len(b) > 0 {
pkt := b
if len(b) > dispatchPacketBytes {
pkt = pkt[:dispatchPacketBytes]
b = b[dispatchPacketBytes:]
} else {
b = nil
}
pkts = append(pkts, dispatchPacket{pkt, w})
}
d.mu.Lock()
d.q = append(d.q, pkts...)
d.mu.Unlock()
}
}
type dispatcherImmediate struct{}
func newDispatcherImmediate() dispatcher {
return &dispatcherImmediate{}
}
func (d *dispatcherImmediate) Copy(w io.Writer, f fetchFunc) error {
for {
b, err := f()
if err != nil {
return err
}
if _, err := w.Write(b); err != nil {
return err
}
}
}

View file

@ -0,0 +1,108 @@
#!/bin/bash
PROCFILE="tools/local-tester/Procfile"
HTTPFAIL=(127.0.0.1:11180 127.0.0.1:22280 127.0.0.1:33380)
function wait_time {
expr $RANDOM % 10 + 1
}
function cycle {
for a; do
echo "cycling $a"
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
sleep `wait_time`s
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
done
}
function cycle_members {
cycle etcd1 etcd2 etcd3
}
function cycle_pbridge {
cycle pbridge1 pbridge2 pbridge3
}
function cycle_cbridge {
cycle cbridge1 cbridge2 cbridge3
}
function cycle_stresser {
cycle stress-put
}
function kill_maj {
idx="etcd"`expr $RANDOM % 3 + 1`
idx2="$idx"
while [ "$idx" == "$idx2" ]; do
idx2="etcd"`expr $RANDOM % 3 + 1`
done
echo "kill majority $idx $idx2"
goreman -f $PROCFILE run stop $idx || echo "could not stop $idx"
goreman -f $PROCFILE run stop $idx2 || echo "could not stop $idx2"
sleep `wait_time`s
goreman -f $PROCFILE run restart $idx || echo "could not restart $idx"
goreman -f $PROCFILE run restart $idx2 || echo "could not restart $idx2"
}
function kill_all {
for a in etcd1 etcd2 etcd3; do
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
done
sleep `wait_time`s
for a in etcd1 etcd2 etcd3; do
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
done
}
function rand_fp {
echo "$FAILPOINTS" | sed `expr $RANDOM % $NUMFPS + 1`"q;d"
}
# fp_activate <http> <fppath> <value>
function fp_activate {
curl "$1"/"$2" -XPUT -d "$3" >/dev/null 2>&1
}
function fp_rand_single {
fp=`rand_fp`
fp_activate ${HTTPFAIL[`expr $RANDOM % ${#HTTPFAIL[@]}`]} $fp 'panic("'$fp'")'
sleep `wait_time`s
}
function fp_rand_all {
fp=`rand_fp`
for a in `seq ${#HTTPFAIL[@]}`; do fp_activate ${HTTPFAIL[$a]} "$fp" 'panic("'$fp'")'; done
sleep `wait_time`s
}
function fp_all_rand_fire {
for fp in $FAILPOINTS; do
for url in "${HTTPFAIL[@]}"; do
fp_activate "$url" "$fp" '0.5%panic("0.5%'$fp'")'
done
done
}
function choose {
fault=${FAULTS[`expr $RANDOM % ${#FAULTS[@]}`]}
echo $fault
$fault || echo "failed: $fault"
}
sleep 2s
FAULTS=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
# add failpoint faults if available
FAILPOINTS=`curl http://"${HTTPFAIL[0]}" 2>/dev/null | cut -f1 -d'=' | grep -v "^$"`
NUMFPS=`echo $(echo "$FAILPOINTS" | wc -l)`
if [ "$NUMFPS" != "0" ]; then
FAULTS+=(fp_rand_single)
FAULTS+=(fp_rand_all)
fi
while [ 1 ]; do
choose
# start any nodes that have been killed by failpoints
for a in etcd1 etcd2 etcd3; do goreman -f $PROCFILE run start $a; done
fp_all_rand_fire
done