-
Notifications
You must be signed in to change notification settings - Fork 92
Eagerly fetch data in hot-storage #807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
79fb7bb
fe8dd8a
1eb7a77
a2f60e2
427d347
30eefec
680f182
1c35588
90a20cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,6 +130,7 @@ func configFromFlags() (server.Config, error) { | |
| ffsSchedMaxParallel := config.GetInt("ffsschedmaxparallel") | ||
| ffsDealWatchFinalityTimeout := time.Minute * time.Duration(config.GetInt("ffsdealfinalitytimeout")) | ||
| ffsMinimumPieceSize := config.GetUint64("ffsminimumpiecesize") | ||
| ffsRetrievalNextEventTimeout := config.GetDuration("ffsretrievalnexteventtimeout") | ||
| ffsMaxParallelDealPreparing := config.GetInt("ffsmaxparalleldealpreparing") | ||
| ffsGCInterval := time.Minute * time.Duration(config.GetInt("ffsgcinterval")) | ||
| ffsGCStagedGracePeriod := time.Minute * time.Duration(config.GetInt("ffsgcstagedgraceperiod")) | ||
|
|
@@ -166,18 +167,19 @@ func configFromFlags() (server.Config, error) { | |
| MongoURI: mongoURI, | ||
| MongoDB: mongoDB, | ||
|
|
||
| FFSAdminToken: ffsAdminToken, | ||
| FFSUseMasterAddr: ffsUseMasterAddr, | ||
| FFSDealFinalityTimeout: ffsDealWatchFinalityTimeout, | ||
| FFSMinimumPieceSize: ffsMinimumPieceSize, | ||
| FFSMaxParallelDealPreparing: ffsMaxParallelDealPreparing, | ||
| FFSGCAutomaticGCInterval: ffsGCInterval, | ||
| FFSGCStageGracePeriod: ffsGCStagedGracePeriod, | ||
| AutocreateMasterAddr: autocreateMasterAddr, | ||
| MinerSelector: minerSelector, | ||
| MinerSelectorParams: minerSelectorParams, | ||
| SchedMaxParallel: ffsSchedMaxParallel, | ||
| DealWatchPollDuration: dealWatchPollDuration, | ||
| FFSAdminToken: ffsAdminToken, | ||
| FFSUseMasterAddr: ffsUseMasterAddr, | ||
| FFSDealFinalityTimeout: ffsDealWatchFinalityTimeout, | ||
| FFSMinimumPieceSize: ffsMinimumPieceSize, | ||
| FFSRetrievalNextEventTimeout: ffsRetrievalNextEventTimeout, | ||
| FFSMaxParallelDealPreparing: ffsMaxParallelDealPreparing, | ||
| FFSGCAutomaticGCInterval: ffsGCInterval, | ||
| FFSGCStageGracePeriod: ffsGCStagedGracePeriod, | ||
| AutocreateMasterAddr: autocreateMasterAddr, | ||
| MinerSelector: minerSelector, | ||
| MinerSelectorParams: minerSelectorParams, | ||
| SchedMaxParallel: ffsSchedMaxParallel, | ||
| DealWatchPollDuration: dealWatchPollDuration, | ||
|
|
||
| AskIndexQueryAskTimeout: askIndexQueryAskTimeout, | ||
| AskIndexRefreshInterval: askIndexRefreshInterval, | ||
|
|
@@ -387,6 +389,7 @@ func setupFlags() error { | |
| pflag.String("ffsminerselector", "reputation", "Miner selector to be used by FFS: 'sr2', 'reputation'.") | ||
| pflag.String("ffsminerselectorparams", "", "Miner selector configuration parameter, depends on --ffsminerselector.") | ||
| pflag.String("ffsminimumpiecesize", "67108864", "Minimum piece size in bytes allowed to be stored in Filecoin.") | ||
| pflag.Duration("ffsretrievalnexteventtimeout", time.Hour, "Maximum amount of time to wait for the next retrieval event before erroring it.") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be great in the future to change all duration flags to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, I've been using that lately, quite nice. |
||
| pflag.String("ffsschedmaxparallel", "1000", "Maximum amount of Jobs executed in parallel.") | ||
| pflag.String("ffsdealfinalitytimeout", "4320", "Deadline in minutes in which a deal must prove liveness changing status before considered abandoned.") | ||
| pflag.String("ffsmaxparalleldealpreparing", "2", "Max parallel deal preparing tasks.") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -411,7 +411,7 @@ func (m *Module) recordRetrieval(addr string, offer api.QueryOffer, bytesReceive | |
| RootCid: offer.Root, | ||
| Size: offer.Size, | ||
| MinPrice: offer.MinPrice.Uint64(), | ||
| Miner: offer.Miner.String(), | ||
| Miner: offer.MinerPeer.Address.String(), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Solve a situation in which miners claim that the retrieval will be done from a worker address, and not their owner address. This might confuse the user since the deal was made with miner X, and it might see that the retrieval is being made from miner Y. (And miner Y hasn't stored data in the network and it's mostly empty). |
||
| MinerPeerID: offer.MinerPeer.ID.String(), | ||
| PaymentInterval: offer.PaymentInterval, | ||
| PaymentIntervalIncrease: offer.PaymentIntervalIncrease, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -123,7 +123,7 @@ func (m *Module) retrieve(ctx context.Context, lapi *apistruct.FullNodeStruct, l | |
| break Loop | ||
| } | ||
| if e.Err != "" { | ||
| log.Infof("in progress retrieval errored: %s", err) | ||
| log.Infof("in progress retrieval errored: %s", e.Err) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrong error variable. |
||
| errMsg = e.Err | ||
| } | ||
| if dtStart.IsZero() && e.Event == retrievalmarket.ClientEventBlocksReceived { | ||
|
|
@@ -146,23 +146,23 @@ func (m *Module) retrieve(ctx context.Context, lapi *apistruct.FullNodeStruct, l | |
| // payment channel creation. This isn't ideal, but | ||
| // it's better than missing the data. | ||
| // We WARN just to signal this might be happening. | ||
| if dtStart.IsZero() { | ||
| if dtStart.IsZero() && errMsg == "" { | ||
| dtStart = retrievalStartTime | ||
| log.Warnf("retrieval data-transfer start fallback to retrieval start") | ||
| } | ||
| // This is a fallback to not receiving an expected | ||
| // event in the retrieval. We just fallback to Now(), | ||
| // which should always be pretty close to the real | ||
| // event. We WARN just to signal this is happening. | ||
| if dtEnd.IsZero() { | ||
| if dtEnd.IsZero() && errMsg == "" { | ||
| dtEnd = time.Now() | ||
| log.Warnf("retrieval data-transfer end fallback to retrieval end") | ||
| } | ||
| m.recordRetrieval(waddr, o, bytesReceived, dtStart, dtEnd, errMsg) | ||
| } | ||
| }() | ||
|
|
||
| return o.Miner.String(), out, nil | ||
| return o.MinerPeer.Address.String(), out, nil | ||
| } | ||
|
|
||
| func getRetrievalOffers(ctx context.Context, lapi *apistruct.FullNodeStruct, payloadCid cid.Cid, pieceCid *cid.Cid, miners []string) []api.QueryOffer { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,13 +57,12 @@ func New(ds datastore.TxnDatastore, ipfs iface.CoreAPI, l ffs.JobLogger) (*CoreI | |
|
|
||
| // Stage adds the data of io.Reader in the storage, and creates a stage-pin on the resulting cid. | ||
| func (ci *CoreIpfs) Stage(ctx context.Context, iid ffs.APIID, r io.Reader) (cid.Cid, error) { | ||
| ci.lock.Lock() | ||
| defer ci.lock.Unlock() | ||
|
|
||
| p, err := ci.ipfs.Unixfs().Add(ctx, ipfsfiles.NewReaderFile(r), options.Unixfs.Pin(true)) | ||
| if err != nil { | ||
| return cid.Undef, fmt.Errorf("adding data to ipfs: %s", err) | ||
| } | ||
| ci.lock.Lock() | ||
| defer ci.lock.Unlock() | ||
|
|
||
| if err := ci.ps.AddStaged(iid, p.Cid()); err != nil { | ||
| return cid.Undef, fmt.Errorf("saving new pin in pinstore: %s", err) | ||
|
|
@@ -72,8 +71,11 @@ func (ci *CoreIpfs) Stage(ctx context.Context, iid ffs.APIID, r io.Reader) (cid. | |
| return p.Cid(), nil | ||
| } | ||
|
|
||
| // StageCid stage-pin a Cid. | ||
| // StageCid pull the Cid data and stage-pin it. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're changing the meaning of |
||
| func (ci *CoreIpfs) StageCid(ctx context.Context, iid ffs.APIID, c cid.Cid) error { | ||
| if err := ci.ipfs.Pin().Add(ctx, path.IpfsPath(c), options.Pin.Recursive(true)); err != nil { | ||
| return fmt.Errorf("adding data to ipfs: %s", err) | ||
| } | ||
| ci.lock.Lock() | ||
| defer ci.lock.Unlock() | ||
|
|
||
|
|
@@ -263,7 +265,7 @@ Loop: | |
|
|
||
| // Skip Cids that are excluded. | ||
| if _, ok := excludeMap[stagedPin.Cid]; ok { | ||
| log.Infof("skipping staged cid %s since it's in exclusion list", stagedPin) | ||
| log.Infof("skipping staged cid %s since it's in exclusion list", stagedPin.Cid) | ||
| continue Loop | ||
| } | ||
| // A Cid is only safe to GC if all existing stage-pin are older than | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ import ( | |
| "github.com/filecoin-project/go-fil-markets/storagemarket" | ||
| "github.com/filecoin-project/go-state-types/abi" | ||
| "github.com/filecoin-project/lotus/api" | ||
| marketevents "github.com/filecoin-project/lotus/markets/loggers" | ||
| "github.com/ipfs/go-cid" | ||
| logger "github.com/ipfs/go-log/v2" | ||
| iface "github.com/ipfs/interface-go-ipfs-core" | ||
|
|
@@ -35,15 +36,16 @@ var ( | |
| // FilCold is a ColdStorage implementation which saves data in the Filecoin network. | ||
| // It assumes the underlying Filecoin client has access to an IPFS node where data is stored. | ||
| type FilCold struct { | ||
| ms ffs.MinerSelector | ||
| dm *dealsModule.Module | ||
| wm wallet.Module | ||
| ipfs iface.CoreAPI | ||
| chain FilChain | ||
| l ffs.JobLogger | ||
| lsm *lotus.SyncMonitor | ||
| minPieceSize uint64 | ||
| semaphDealPrep chan struct{} | ||
| ms ffs.MinerSelector | ||
| dm *dealsModule.Module | ||
| wm wallet.Module | ||
| ipfs iface.CoreAPI | ||
| chain FilChain | ||
| l ffs.JobLogger | ||
| lsm *lotus.SyncMonitor | ||
| minPieceSize uint64 | ||
| retrNextEventTimeout time.Duration | ||
| semaphDealPrep chan struct{} | ||
| } | ||
|
|
||
| var _ ffs.ColdStorage = (*FilCold)(nil) | ||
|
|
@@ -54,17 +56,18 @@ type FilChain interface { | |
| } | ||
|
|
||
| // New returns a new FilCold instance. | ||
| func New(ms ffs.MinerSelector, dm *dealsModule.Module, wm wallet.Module, ipfs iface.CoreAPI, chain FilChain, l ffs.JobLogger, lsm *lotus.SyncMonitor, minPieceSize uint64, maxParallelDealPreparing int) *FilCold { | ||
| func New(ms ffs.MinerSelector, dm *dealsModule.Module, wm wallet.Module, ipfs iface.CoreAPI, chain FilChain, l ffs.JobLogger, lsm *lotus.SyncMonitor, minPieceSize uint64, maxParallelDealPreparing int, retrievalNextEventTimeout time.Duration) *FilCold { | ||
| return &FilCold{ | ||
| ms: ms, | ||
| dm: dm, | ||
| wm: wm, | ||
| ipfs: ipfs, | ||
| chain: chain, | ||
| l: l, | ||
| lsm: lsm, | ||
| minPieceSize: minPieceSize, | ||
| semaphDealPrep: make(chan struct{}, maxParallelDealPreparing), | ||
| ms: ms, | ||
| dm: dm, | ||
| wm: wm, | ||
| ipfs: ipfs, | ||
| chain: chain, | ||
| l: l, | ||
| lsm: lsm, | ||
| minPieceSize: minPieceSize, | ||
| retrNextEventTimeout: retrievalNextEventTimeout, | ||
| semaphDealPrep: make(chan struct{}, maxParallelDealPreparing), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -76,21 +79,39 @@ func (fc *FilCold) Fetch(ctx context.Context, pyCid cid.Cid, piCid *cid.Cid, wad | |
| return ffs.FetchInfo{}, fmt.Errorf("fetching from deal module: %s", err) | ||
| } | ||
| fc.l.Log(ctx, "Fetching from %s...", miner) | ||
| var fundsSpent uint64 | ||
| var lastMsg string | ||
| for e := range events { | ||
| if e.Err != "" { | ||
| return ffs.FetchInfo{}, fmt.Errorf("event error in retrieval progress: %s", e.Err) | ||
| } | ||
| strEvent := retrievalmarket.ClientEvents[e.Event] | ||
| strDealStatus := retrievalmarket.DealStatuses[e.Status] | ||
| fundsSpent = e.FundsSpent.Uint64() | ||
| newMsg := fmt.Sprintf("Received %s, total spent: %sFIL (%s/%s)", humanize.IBytes(e.BytesReceived), util.AttoFilToFil(fundsSpent), strEvent, strDealStatus) | ||
| if newMsg != lastMsg { | ||
| fc.l.Log(ctx, newMsg) | ||
| lastMsg = newMsg | ||
|
|
||
| var ( | ||
| fundsSpent uint64 | ||
| lastMsg string | ||
| lastEvent marketevents.RetrievalEvent | ||
| ) | ||
| Loop: | ||
| for { | ||
| select { | ||
| case <-time.After(fc.retrNextEventTimeout): | ||
| return ffs.FetchInfo{}, fmt.Errorf("didn't receive events for %d minutes", int64(fc.retrNextEventTimeout.Minutes())) | ||
| case e, ok := <-events: | ||
| if !ok { | ||
| break Loop | ||
| } | ||
| if e.Err != "" { | ||
| return ffs.FetchInfo{}, fmt.Errorf("event error in retrieval progress: %s", e.Err) | ||
| } | ||
| strEvent := retrievalmarket.ClientEvents[e.Event] | ||
| strDealStatus := retrievalmarket.DealStatuses[e.Status] | ||
| fundsSpent = e.FundsSpent.Uint64() | ||
| newMsg := fmt.Sprintf("Received %s, total spent: %sFIL (%s/%s)", humanize.IBytes(e.BytesReceived), util.AttoFilToFil(fundsSpent), strEvent, strDealStatus) | ||
| if newMsg != lastMsg { | ||
| fc.l.Log(ctx, newMsg) | ||
| lastMsg = newMsg | ||
| } | ||
| lastEvent = e | ||
| } | ||
| } | ||
| if lastEvent.Status != retrievalmarket.DealStatusCompleted { | ||
| return ffs.FetchInfo{}, fmt.Errorf("retrieval failed with status %s and message %s", retrievalmarket.DealStatuses[lastEvent.Status], lastMsg) | ||
| } | ||
|
|
||
|
Comment on lines
+89
to
+114
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TL;DR:
|
||
| return ffs.FetchInfo{RetrievedMiner: miner, FundsSpent: fundsSpent}, nil | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,7 +33,7 @@ var ( | |
| Hot: ffs.HotConfig{ | ||
| Enabled: false, | ||
| Ipfs: ffs.IpfsConfig{ | ||
| AddTimeout: 480, // 8 min | ||
| AddTimeout: 15 * 60, // 15min | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is a better default for big-data. The user can change it anyway.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth updating everywhere in the db using the massive change tool?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, I have that as a task in my pre-deployment steps. Using |
||
| }, | ||
| }, | ||
| Cold: ffs.ColdConfig{ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A new config attribute to set a timeout for retrieval that might get stuck.
If we don't receive any data or event while doing the retrieval in this duration, we fail.
Unfortunately, there're situations/bugs in Lotus in which a retrieval might get stuck, so this is a safety net.