mirror of
https://github.com/cryptomator/cryptomator.git
synced 2026-05-14 08:41:28 +00:00
(now really) fixed deadlock when crypto workers die due to exceptions
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
package org.cryptomator.crypto.aes256;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.CompletionService;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorCompletionService;
|
||||
@@ -27,7 +31,7 @@ class CryptoWorkerExecutor {
|
||||
private final AtomicLong currentBlock;
|
||||
private final BlockingQueue<BlocksData> inputQueue;
|
||||
private final ExecutorService executorService;
|
||||
private final CompletionService<Void> completionService;
|
||||
private final Future<Void> allWork;
|
||||
private volatile boolean acceptWork;
|
||||
|
||||
/**
|
||||
@@ -40,14 +44,17 @@ class CryptoWorkerExecutor {
|
||||
this.currentBlock = new AtomicLong();
|
||||
this.inputQueue = new LinkedBlockingQueue<>(numWorkers * 2); // one cycle read-ahead
|
||||
this.executorService = Executors.newFixedThreadPool(numWorkers);
|
||||
this.completionService = new ExecutorCompletionService<>(executorService);
|
||||
this.acceptWork = true;
|
||||
|
||||
// start workers:
|
||||
final CompletionService<Void> completionService = new ExecutorCompletionService<>(executorService);
|
||||
final Collection<Future<?>> workers = new ArrayList<>(numWorkers);
|
||||
for (int i = 0; i < numWorkers; i++) {
|
||||
final CryptoWorker worker = workerFactory.createWorker(lock, blockDone, currentBlock, inputQueue);
|
||||
completionService.submit(worker);
|
||||
workers.add(completionService.submit(worker));
|
||||
}
|
||||
final Supervisor supervisor = new Supervisor(workers, completionService);
|
||||
this.allWork = executorService.submit(supervisor);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -79,22 +86,20 @@ class CryptoWorkerExecutor {
|
||||
/**
|
||||
* Graceful shutdown of this executor, waiting for all jobs to finish (normally or by throwing exceptions).
|
||||
*
|
||||
* @param timeout Maximum time spent <em>per worker</em> to wait for a graceful shutdown (technically worst case is: <code>2 * numWorkers * time</code>)
|
||||
* @param timeout Maximum time spent <em>per worker</em> to wait for a graceful shutdown
|
||||
* @param unit Timeout unit
|
||||
* @throws ExecutionException If any of the workers failed.
|
||||
*/
|
||||
public void waitUntilDone(long timeout, TimeUnit unit) throws ExecutionException {
|
||||
this.acceptWork = false;
|
||||
try {
|
||||
// fail fast, if workers are done before being poisoned (i.e. exceptionally):
|
||||
for (Future<Void> task = completionService.poll(); task != null; task = completionService.poll()) {
|
||||
task.get(); // this will most likely throw an ExecutionException
|
||||
}
|
||||
// if we got to this point without any exception, all workers are still running, so lets poison them:
|
||||
poisonWorkers(timeout, unit);
|
||||
// now workers will one after another finish their work, potentially throwing an ExecutionException:
|
||||
for (Future<Void> task = completionService.poll(timeout, unit); task != null; task = completionService.poll(timeout, unit)) {
|
||||
task.get();
|
||||
if (allWork.isDone()) {
|
||||
// Work is done before workers being poisoned? This will most likely throw an ExecutionException:
|
||||
allWork.get();
|
||||
} else {
|
||||
// Work not done yet, enqueue poison pill and wait for workers to finish:
|
||||
poisonWorkers(timeout, unit);
|
||||
allWork.get();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Interrupted thread.", e);
|
||||
@@ -119,4 +124,43 @@ class CryptoWorkerExecutor {
|
||||
CryptoWorker createWorker(Lock lock, Condition blockDone, AtomicLong currentBlock, BlockingQueue<BlocksData> inputQueue);
|
||||
}
|
||||
|
||||
/**
|
||||
* A supervisor watches the work results of a collection of workers. The supervisor waits for all workers to finish.
|
||||
* The supvervisor itself does not cause any exceptions, but if <em>one</em> worker fails, all other workers are cancelled immediately and the exception propagates through this supvervisor.
|
||||
* Anyone waiting for the supervisor to finish will thus effectively wait for all supvervisees to finish.
|
||||
*/
|
||||
private static class Supervisor implements Callable<Void> {
|
||||
|
||||
private final Collection<Future<?>> workers;
|
||||
private final CompletionService<?> completionService;
|
||||
|
||||
public Supervisor(Collection<Future<?>> workers, CompletionService<?> completionService) {
|
||||
this.workers = workers;
|
||||
this.completionService = completionService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Void call() throws ExecutionException {
|
||||
try {
|
||||
for (int i = 0; i < workers.size(); i++) {
|
||||
try {
|
||||
// any ExecutionException thrown here will propagate up (after work is canceled in finally block)
|
||||
completionService.take().get();
|
||||
} catch (CancellationException ignore) {
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
// supervisor may be interrupted when executorservice is shut down.
|
||||
Thread.currentThread().interrupt();
|
||||
} finally {
|
||||
// make sure, that at the end of the day all remaining workers leave the building.
|
||||
for (Future<?> worker : workers) {
|
||||
worker.cancel(true);
|
||||
}
|
||||
}
|
||||
// no exception up to this point -> all workers finished work normally.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user