fix: reduce crawler memory usage by orders of magnitude (#11556)

currently crawler waits for an entire readdir call to return until it processes usage, lifecycle, replication and healing - instead we should pass the applicator all the way down to avoid building any special stack for all the contents in a single directory. This allows for - no need to remember the entire list of entries per directory before applying the required functions - no need to wait for entire readdir() call to finish before applying the required functions
2021-02-17 15:34:42 -08:00
parent e07918abe3
commit 289e1d8b2a
8 changed files with 47 additions and 63 deletions
--- a/cmd/storage-errors.go
+++ b/cmd/storage-errors.go
@@ -16,6 +16,8 @@

 package cmd

+import "errors"
+
 // errUnexpected - unexpected error, requires manual intervention.
 var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues")

@@ -104,6 +106,13 @@ var errLessData = StorageErr("less data available than what was requested")
 // errMoreData = returned when more data was sent by the caller than what it was supposed to.
 var errMoreData = StorageErr("more data was sent than what was advertised")

+// indicates readDirFn to return without further applying the fn()
+var errDoneForNow = errors.New("done for now")
+
+// errSkipFile returned by the fn() for readDirFn() when it needs
+// to proceed to next entry.
+var errSkipFile = errors.New("skip this file")
+
 // StorageErr represents error generated by xlStorage call.
 type StorageErr string