-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Trainer library discover master by etcd #2551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
bff8d8d
d80bd05
475f77d
c1cfcdc
3c88fd2
cfeac09
ff3324c
83e7e45
77cbc55
f691bf6
e6492ff
ca6a114
f5ed683
0792515
d4c06ed
3b02067
0d8a0ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| CMakeCache.txt | ||
| cmake_install.cmake | ||
| CMakeFiles | ||
| go | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,6 +54,13 @@ func (a addresser) Address() string { | |
| return string(a) | ||
| } | ||
|
|
||
| //export paddle_new_etcd_master_client | ||
| func paddle_new_etcd_master_client(etcdEndpoints *C.char, bufSize int) C.paddle_master_client { | ||
| p := C.GoString(etcdEndpoints) | ||
| c := master.NewEtcdClient(addresser(p), bufSize) | ||
|
||
| return add(c) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 感觉把
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| } | ||
|
|
||
| //export paddle_new_master_client | ||
| func paddle_new_master_client(addr *C.char, bufSize int) C.paddle_master_client { | ||
| a := C.GoString(addr) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,19 @@ | ||
| package master | ||
|
|
||
| import ( | ||
| "context" | ||
| "os" | ||
| "strings" | ||
| "time" | ||
|
|
||
| "github.com/PaddlePaddle/Paddle/go/connection" | ||
| "github.com/PaddlePaddle/recordio" | ||
| "github.com/coreos/etcd/clientv3" | ||
| log "github.com/sirupsen/logrus" | ||
| ) | ||
|
|
||
| const masterAddrPath = "/master" | ||
|
|
||
| // Addresser provide the address of the master server. | ||
| type Addresser interface { | ||
| Address() string | ||
|
|
@@ -20,6 +25,51 @@ type Client struct { | |
| ch chan []byte | ||
| } | ||
|
|
||
| // MasterAddresser provide master address | ||
|
||
| type masterAddresser struct { | ||
| client *clientv3.Client | ||
| timeout time.Duration | ||
| endpoints []string | ||
| } | ||
|
|
||
| // Address return the address | ||
| func (m masterAddresser) Address() string { | ||
| for { | ||
| ctx, cancel := context.WithTimeout(context.Background(), m.timeout) | ||
| resp, err := m.client.Get(ctx, masterAddrPath) | ||
|
||
| cancel() | ||
| if err != nil { | ||
| log.Errorf("Fetch master addr failed, reconnecting to etcd, %v", err) | ||
| err := m.client.Close() | ||
| if err != nil { | ||
| log.Errorln(err) | ||
| time.Sleep(m.timeout) | ||
| continue | ||
| } | ||
| // reconnect to etcd server | ||
|
||
| m.client, err = clientv3.New(clientv3.Config{ | ||
| Endpoints: m.endpoints, | ||
| DialTimeout: m.timeout, | ||
| }) | ||
| if err != nil { | ||
| log.Errorf("Reconnecting etcd failed, sleep for %d seconds ...\n%v", m.timeout, err) | ||
| time.Sleep(m.timeout) | ||
| continue | ||
| } | ||
| continue | ||
| } | ||
| kvs := resp.Kvs | ||
| if len(kvs) == 0 { | ||
| log.Infoln("Waiting for master be ready ...") | ||
| time.Sleep(m.timeout) | ||
| continue | ||
| } | ||
| mAddr := kvs[0].Value | ||
| log.Debugf("Fetched master address: %s\n", mAddr) | ||
| return string(mAddr) | ||
| } | ||
| } | ||
|
|
||
| // NewClient creates a new Client. | ||
| // | ||
| // bufSize is the record buffer size. NextRecord will read from this | ||
|
|
@@ -33,6 +83,32 @@ func NewClient(addr Addresser, bufSize int) *Client { | |
| return c | ||
| } | ||
|
|
||
| // NewEtcdClient create a new master client by etcd | ||
| // | ||
| // endpoints is the endpoints for etcd and separated by ",", such as | ||
| // "172.0.1.0:2379,172.0.1.1:2379" | ||
| // timeout is the timeout for etcd calls | ||
| // bufSize is the record buffer size. NextRecord will read from this buffer. | ||
| func NewEtcdClient(endpoints string, timeout int, bufSize int) *Client { | ||
| t := time.Second * time.Duration(timeout) | ||
| ep := strings.Split(endpoints, ",") | ||
| cli, err := clientv3.New(clientv3.Config{ | ||
| Endpoints: ep, | ||
| DialTimeout: t, | ||
| }) | ||
| if err != nil { | ||
| log.Errorf("Init etcd connection failed: %v", err) | ||
| panic(err) | ||
| } | ||
| log.Debugf("Connected to etcd: %s\n", endpoints) | ||
| mAddresser := masterAddresser{ | ||
| client: cli, | ||
| timeout: t, | ||
| endpoints: ep, | ||
| } | ||
| return NewClient(mAddresser, bufSize) | ||
| } | ||
|
|
||
| func (c *Client) getRecords() { | ||
| for { | ||
| t, err := c.getTask() | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we still need this files? Since the build result is now moved to build dir
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, done.