aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjdesprez <jdesprez@google.com>2017-06-08 14:55:24 -0700
committerjdesprez <jdesprez@google.com>2017-06-08 14:58:44 -0700
commitd4ab7699c42a21bc8fe63d4cda15ede80041df8e (patch)
tree589d11714bf92767eba47bdf8af5752175053621
parent392acd565089427e021d117bf15b5c4e86bbc1bf (diff)
downloadtradefederation-d4ab7699c42a21bc8fe63d4cda15ede80041df8e.tar.gz
Log more interesting events and allow strong poller
- Log some device events to history_log in order to see more clearly when device went down and up again. - Allow poller to recover from DNAE if device come back online in a reasonable time. Test: run cts-suite --shard-count 5 unit tests (cts, tf) run cts-suite -m CtsSecurityHostTestCases Bug: 37211399 Change-Id: I96d8ce3b6ddb32812a14860954cf0fbfb9ed4dcb
-rw-r--r--src/com/android/tradefed/device/DeviceManager.java1
-rw-r--r--src/com/android/tradefed/invoker/shard/ShardHelper.java7
-rw-r--r--src/com/android/tradefed/invoker/shard/TestsPoolPoller.java151
-rw-r--r--src/com/android/tradefed/log/ILogRegistry.java3
-rw-r--r--src/com/android/tradefed/testtype/suite/ModuleDefinition.java32
-rw-r--r--tests/src/com/android/tradefed/invoker/shard/TestsPoolPollerTest.java66
6 files changed, 199 insertions, 61 deletions
diff --git a/src/com/android/tradefed/device/DeviceManager.java b/src/com/android/tradefed/device/DeviceManager.java
index 9a75239f7..da914808e 100644
--- a/src/com/android/tradefed/device/DeviceManager.java
+++ b/src/com/android/tradefed/device/DeviceManager.java
@@ -1014,6 +1014,7 @@ public class DeviceManager implements IDeviceManager {
} else if (DeviceState.OFFLINE.equals(idevice.getState()) ||
DeviceState.UNAUTHORIZED.equals(idevice.getState())) {
mManagedDeviceList.handleDeviceEvent(testDevice, DeviceEvent.CONNECTED_OFFLINE);
+ logDeviceEvent(EventType.DEVICE_CONNECTED_OFFLINE, testDevice.getSerialNumber());
}
mFirstDeviceAdded.countDown();
}
diff --git a/src/com/android/tradefed/invoker/shard/ShardHelper.java b/src/com/android/tradefed/invoker/shard/ShardHelper.java
index bc3b44879..d5997182e 100644
--- a/src/com/android/tradefed/invoker/shard/ShardHelper.java
+++ b/src/com/android/tradefed/invoker/shard/ShardHelper.java
@@ -36,6 +36,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
+import java.util.concurrent.CountDownLatch;
/** Helper class that handles creating the shards and scheduling them for an invocation. */
public class ShardHelper implements IShardHelper {
@@ -88,17 +89,19 @@ public class ShardHelper implements IShardHelper {
// contiguously in the list.
Collections.shuffle(shardableTests);
int maxShard = Math.min(shardCount, shardableTests.size());
+ CountDownLatch tracker = new CountDownLatch(maxShard);
for (int i = 0; i < maxShard; i++) {
IConfiguration shardConfig = config.clone();
- shardConfig.setTest(new TestsPoolPoller(shardableTests));
+ shardConfig.setTest(new TestsPoolPoller(shardableTests, tracker));
rescheduleConfig(shardConfig, config, context, rescheduler, resultCollector);
}
} else {
+ CountDownLatch tracker = new CountDownLatch(shardableTests.size());
for (IRemoteTest testShard : shardableTests) {
CLog.i("Rescheduling sharded config...");
IConfiguration shardConfig = config.clone();
if (config.getCommandOptions().shouldUseDynamicSharding()) {
- shardConfig.setTest(new TestsPoolPoller(shardableTests));
+ shardConfig.setTest(new TestsPoolPoller(shardableTests, tracker));
} else {
shardConfig.setTest(testShard);
}
diff --git a/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java b/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java
index 1d02e2c39..7ffce6639 100644
--- a/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java
+++ b/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java
@@ -15,6 +15,7 @@
*/
package com.android.tradefed.invoker.shard;
+import com.android.ddmlib.Log.LogLevel;
import com.android.tradefed.build.IBuildInfo;
import com.android.tradefed.config.IConfiguration;
import com.android.tradefed.config.IConfigurationReceiver;
@@ -22,6 +23,8 @@ import com.android.tradefed.device.DeviceNotAvailableException;
import com.android.tradefed.device.DeviceUnresponsiveException;
import com.android.tradefed.device.ITestDevice;
import com.android.tradefed.invoker.IInvocationContext;
+import com.android.tradefed.log.ILogRegistry.EventType;
+import com.android.tradefed.log.LogRegistry;
import com.android.tradefed.log.LogUtil.CLog;
import com.android.tradefed.result.ITestInvocationListener;
import com.android.tradefed.suite.checker.ISystemStatusChecker;
@@ -32,10 +35,13 @@ import com.android.tradefed.testtype.IInvocationContextReceiver;
import com.android.tradefed.testtype.IMultiDeviceTest;
import com.android.tradefed.testtype.IRemoteTest;
import com.android.tradefed.testtype.ITestCollector;
+import com.android.tradefed.util.StreamUtil;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.CountDownLatch;
/**
* Tests wrapper that allow to execute all the tests of a pool of tests. Tests can be shared by
@@ -53,7 +59,10 @@ public class TestsPoolPoller
ISystemStatusCheckerReceiver,
ITestCollector {
+ private static final long WAIT_RECOVERY_TIME = 5 * 60 * 1000;
+
private Collection<IRemoteTest> mGenericPool;
+ private CountDownLatch mTracker;
private ITestDevice mDevice;
private IBuildInfo mBuildInfo;
@@ -67,9 +76,11 @@ public class TestsPoolPoller
* Ctor where the pool of {@link IRemoteTest} is provided.
*
* @param tests {@link IRemoteTest}s pool of all tests.
+ * @param tracker a {@link CountDownLatch} shared to get the number of running poller.
*/
- public TestsPoolPoller(Collection<IRemoteTest> tests) {
+ public TestsPoolPoller(Collection<IRemoteTest> tests, CountDownLatch tracker) {
mGenericPool = tests;
+ mTracker = tracker;
}
/** Returns the first {@link IRemoteTest} from the pool or null if none remaining. */
@@ -87,60 +98,96 @@ public class TestsPoolPoller
/** {@inheritDoc} */
@Override
public void run(ITestInvocationListener listener) throws DeviceNotAvailableException {
- while (true) {
- IRemoteTest test = poll();
- if (test == null) {
- return;
- }
- if (test instanceof IBuildReceiver) {
- ((IBuildReceiver) test).setBuild(mBuildInfo);
- }
- if (test instanceof IConfigurationReceiver) {
- ((IConfigurationReceiver) test).setConfiguration(mConfig);
- }
- if (test instanceof IDeviceTest) {
- ((IDeviceTest) test).setDevice(mDevice);
- }
- if (test instanceof IInvocationContextReceiver) {
- ((IInvocationContextReceiver) test).setInvocationContext(mContext);
- }
- if (test instanceof IMultiDeviceTest) {
- ((IMultiDeviceTest) test).setDeviceInfos(mDeviceInfos);
- }
- if (test instanceof ISystemStatusCheckerReceiver) {
- ((ISystemStatusCheckerReceiver) test).setSystemStatusChecker(mSystemStatusCheckers);
- }
- if (test instanceof ITestCollector) {
- ((ITestCollector) test).setCollectTestsOnly(mShouldCollectTest);
+ try {
+ while (true) {
+ IRemoteTest test = poll();
+ if (test == null) {
+ return;
+ }
+ if (test instanceof IBuildReceiver) {
+ ((IBuildReceiver) test).setBuild(mBuildInfo);
+ }
+ if (test instanceof IConfigurationReceiver) {
+ ((IConfigurationReceiver) test).setConfiguration(mConfig);
+ }
+ if (test instanceof IDeviceTest) {
+ ((IDeviceTest) test).setDevice(mDevice);
+ }
+ if (test instanceof IInvocationContextReceiver) {
+ ((IInvocationContextReceiver) test).setInvocationContext(mContext);
+ }
+ if (test instanceof IMultiDeviceTest) {
+ ((IMultiDeviceTest) test).setDeviceInfos(mDeviceInfos);
+ }
+ if (test instanceof ISystemStatusCheckerReceiver) {
+ ((ISystemStatusCheckerReceiver) test)
+ .setSystemStatusChecker(mSystemStatusCheckers);
+ }
+ if (test instanceof ITestCollector) {
+ ((ITestCollector) test).setCollectTestsOnly(mShouldCollectTest);
+ }
+ // Run the test itself and prevent random exception from stopping the poller.
+ try {
+ test.run(listener);
+ } catch (RuntimeException e) {
+ CLog.e(
+ "Caught an Exception in a test: %s. Proceeding to next test.",
+ test.getClass());
+ CLog.e(e);
+ } catch (DeviceUnresponsiveException due) {
+ // being able to catch a DeviceUnresponsiveException here implies that recovery was
+ // successful, and test execution should proceed to next test.
+ CLog.w(
+ "Ignored DeviceUnresponsiveException because recovery was "
+ + "successful, proceeding with next test. Stack trace:");
+ CLog.w(due);
+ CLog.w("Proceeding to the next test.");
+ } catch (DeviceNotAvailableException dnae) {
+ HandleDeviceNotAvailable(dnae, test);
+ }
}
- // Run the test itself and prevent random exception from stopping the poller.
- try {
- test.run(listener);
- } catch (RuntimeException e) {
- CLog.e(
- "Caught an Exception in a test: %s. Proceeding to next test.",
- test.getClass());
- CLog.e(e);
- } catch (DeviceUnresponsiveException due) {
- // being able to catch a DeviceUnresponsiveException here implies that recovery was
- // successful, and test execution should proceed to next test.
- CLog.w(
- "Ignored DeviceUnresponsiveException because recovery was "
- + "successful, proceeding with next test. Stack trace:");
- CLog.w(due);
- CLog.w("Proceeding to the next test.");
- } catch (DeviceNotAvailableException dnae) {
- // We catch and rethrow in order to log that the poller associated with the device
- // that went offline is terminating.
- CLog.e(
- "Test %s threw DeviceNotAvailableException. Test poller associated with "
- + "device %s is terminating.",
- test.getClass(), mDevice.getSerialNumber());
- // TODO: Add a fail-safe mechanism in case all pollers terminate and we still have
- // tests in the pool.
- throw dnae;
+ } finally {
+ mTracker.countDown();
+ }
+ }
+
+ /**
+ * Helper to wait for the device to maybe come back online, in that case we reboot it to refresh
+ * the state and proceed with execution.
+ */
+ void HandleDeviceNotAvailable(DeviceNotAvailableException originalException, IRemoteTest test)
+ throws DeviceNotAvailableException {
+ try {
+ if (mTracker.getCount() > 1) {
+ CLog.d("Wait 5 min for device to maybe coming back online.");
+ mDevice.waitForDeviceAvailable(WAIT_RECOVERY_TIME);
+ mDevice.reboot();
+ CLog.d("TestPoller was recovered after %s went offline", mDevice.getSerialNumber());
+ return;
}
+ // We catch and rethrow in order to log that the poller associated with the device
+ // that went offline is terminating.
+ CLog.e(
+ "Test %s threw DeviceNotAvailableException. Test poller associated with "
+ + "device %s is terminating.",
+ test.getClass(), mDevice.getSerialNumber());
+ // Log an event to track more easily the failure
+ logDeviceEvent(
+ EventType.SHARD_POLLER_EARLY_TERMINATION,
+ mDevice.getSerialNumber(),
+ originalException);
+ } catch (DeviceNotAvailableException e) {
+ // ignore this exception
}
+ throw originalException;
+ }
+
+ /** Helper to log the device events. */
+ private void logDeviceEvent(EventType event, String serial, Throwable t) {
+ Map<String, String> args = new HashMap<>();
+ args.put("serial", serial);
+ args.put("trace", StreamUtil.getStackTrace(t));
+ LogRegistry.getLogRegistry().logEvent(LogLevel.DEBUG, event, args);
}
@Override
diff --git a/src/com/android/tradefed/log/ILogRegistry.java b/src/com/android/tradefed/log/ILogRegistry.java
index e541a121a..c08e0820e 100644
--- a/src/com/android/tradefed/log/ILogRegistry.java
+++ b/src/com/android/tradefed/log/ILogRegistry.java
@@ -31,10 +31,13 @@ public interface ILogRegistry extends ILogOutput {
/** Events that are useful to be logged */
public enum EventType {
DEVICE_CONNECTED,
+ DEVICE_CONNECTED_OFFLINE,
DEVICE_DISCONNECTED,
INVOCATION_START,
INVOCATION_END,
HEAP_MEMORY,
+ SHARD_POLLER_EARLY_TERMINATION,
+ MODULE_DEVICE_NOT_AVAILABLE,
}
/**
diff --git a/src/com/android/tradefed/testtype/suite/ModuleDefinition.java b/src/com/android/tradefed/testtype/suite/ModuleDefinition.java
index a1d42898e..ef193b789 100644
--- a/src/com/android/tradefed/testtype/suite/ModuleDefinition.java
+++ b/src/com/android/tradefed/testtype/suite/ModuleDefinition.java
@@ -15,6 +15,7 @@
*/
package com.android.tradefed.testtype.suite;
+import com.android.ddmlib.Log.LogLevel;
import com.android.ddmlib.testrunner.TestIdentifier;
import com.android.ddmlib.testrunner.TestResult;
import com.android.ddmlib.testrunner.TestRunResult;
@@ -23,7 +24,9 @@ import com.android.tradefed.device.DeviceNotAvailableException;
import com.android.tradefed.device.DeviceUnresponsiveException;
import com.android.tradefed.device.ITestDevice;
import com.android.tradefed.invoker.IInvocationContext;
+import com.android.tradefed.log.ILogRegistry.EventType;
import com.android.tradefed.log.ITestLogger;
+import com.android.tradefed.log.LogRegistry;
import com.android.tradefed.log.LogUtil.CLog;
import com.android.tradefed.result.ITestInvocationListener;
import com.android.tradefed.result.ITestLoggerReceiver;
@@ -37,6 +40,7 @@ import com.android.tradefed.testtype.IBuildReceiver;
import com.android.tradefed.testtype.IDeviceTest;
import com.android.tradefed.testtype.IRemoteTest;
import com.android.tradefed.testtype.ITestCollector;
+import com.android.tradefed.util.StreamUtil;
import com.google.common.annotations.VisibleForTesting;
@@ -249,6 +253,20 @@ public class ModuleDefinition implements Comparable<ModuleDefinition>, ITestColl
+ "successful, proceeding with next module. Stack trace:");
CLog.w(due);
CLog.w("Proceeding to the next test.");
+ } catch (DeviceNotAvailableException dnae) {
+ // We do special logging of some information in Context of the module for easier
+ // debugging.
+ CLog.e(
+ "Module %s threw a DeviceNotAvailableException on device %s during test %s",
+ getId(), mDevice.getSerialNumber(), test.getClass());
+ CLog.e(dnae);
+ // log an events
+ logDeviceEvent(
+ EventType.MODULE_DEVICE_NOT_AVAILABLE,
+ mDevice.getSerialNumber(),
+ dnae,
+ getId());
+ throw dnae;
} finally {
mTestsResults.addAll(moduleListener.getRunResults());
mExpectedTests += moduleListener.getNumTotalTests();
@@ -262,6 +280,11 @@ public class ModuleDefinition implements Comparable<ModuleDefinition>, ITestColl
CLog.d("Cleaner: %s", cleaner.getClass().getSimpleName());
cleaner.tearDown(mDevice, mBuild, null);
}
+ } catch (DeviceNotAvailableException tearDownException) {
+ CLog.e(
+ "Module %s failed during tearDown with: %s",
+ getId(), StreamUtil.getStackTrace(tearDownException));
+ throw tearDownException;
} finally {
mElapsedTearDown = getCurrentTime() - cleanStartTime;
// finalize results
@@ -272,6 +295,15 @@ public class ModuleDefinition implements Comparable<ModuleDefinition>, ITestColl
}
}
+ /** Helper to log the device events. */
+ private void logDeviceEvent(EventType event, String serial, Throwable t, String moduleId) {
+ Map<String, String> args = new HashMap<>();
+ args.put("serial", serial);
+ args.put("trace", StreamUtil.getStackTrace(t));
+ args.put("module-id", moduleId);
+ LogRegistry.getLogRegistry().logEvent(LogLevel.DEBUG, event, args);
+ }
+
/** Finalize results to report them all and count if there are missing tests. */
private void reportFinalResults(
ITestInvocationListener listener,
diff --git a/tests/src/com/android/tradefed/invoker/shard/TestsPoolPollerTest.java b/tests/src/com/android/tradefed/invoker/shard/TestsPoolPollerTest.java
index 39891c307..ced28b9cd 100644
--- a/tests/src/com/android/tradefed/invoker/shard/TestsPoolPollerTest.java
+++ b/tests/src/com/android/tradefed/invoker/shard/TestsPoolPollerTest.java
@@ -15,7 +15,11 @@
*/
package com.android.tradefed.invoker.shard;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import com.android.tradefed.config.OptionSetter;
import com.android.tradefed.device.DeviceNotAvailableException;
@@ -33,6 +37,7 @@ import org.mockito.Mockito;
import java.util.ArrayList;
import java.util.List;
+import java.util.concurrent.CountDownLatch;
/** Unit tests for {@link TestsPoolPoller}. */
@RunWith(JUnit4.class)
@@ -59,8 +64,9 @@ public class TestsPoolPollerTest {
for (int i = 0; i < numTests; i++) {
testsList.add(new StubTest());
}
- TestsPoolPoller poller1 = new TestsPoolPoller(testsList);
- TestsPoolPoller poller2 = new TestsPoolPoller(testsList);
+ CountDownLatch tracker = new CountDownLatch(2);
+ TestsPoolPoller poller1 = new TestsPoolPoller(testsList, tracker);
+ TestsPoolPoller poller2 = new TestsPoolPoller(testsList, tracker);
// initial size
assertEquals(numTests, testsList.size());
assertNotNull(poller1.poll());
@@ -90,12 +96,14 @@ public class TestsPoolPollerTest {
setter.setOptionValue("run-a-test", "true");
testsList.add(test);
}
- TestsPoolPoller poller = new TestsPoolPoller(testsList);
+ CountDownLatch tracker = new CountDownLatch(1);
+ TestsPoolPoller poller = new TestsPoolPoller(testsList, tracker);
poller.run(mListener);
Mockito.verify(mListener, Mockito.times(numTests))
.testRunStarted(Mockito.anyString(), Mockito.anyInt());
Mockito.verify(mListener, Mockito.times(numTests))
.testRunEnded(Mockito.anyLong(), Mockito.any());
+ assertEquals(0, tracker.getCount());
}
/**
@@ -118,12 +126,14 @@ public class TestsPoolPollerTest {
s.setOptionValue("run-a-test", "true");
testsList.add(test);
}
- TestsPoolPoller poller = new TestsPoolPoller(testsList);
+ CountDownLatch tracker = new CountDownLatch(1);
+ TestsPoolPoller poller = new TestsPoolPoller(testsList, tracker);
poller.run(mListener);
Mockito.verify(mListener, Mockito.times(numTests))
.testRunStarted(Mockito.anyString(), Mockito.anyInt());
Mockito.verify(mListener, Mockito.times(numTests))
.testRunEnded(Mockito.anyLong(), Mockito.any());
+ assertEquals(0, tracker.getCount());
}
/**
@@ -146,12 +156,14 @@ public class TestsPoolPollerTest {
s.setOptionValue("run-a-test", "true");
testsList.add(test);
}
- TestsPoolPoller poller = new TestsPoolPoller(testsList);
+ CountDownLatch tracker = new CountDownLatch(1);
+ TestsPoolPoller poller = new TestsPoolPoller(testsList, tracker);
poller.run(mListener);
Mockito.verify(mListener, Mockito.times(numTests))
.testRunStarted(Mockito.anyString(), Mockito.anyInt());
Mockito.verify(mListener, Mockito.times(numTests))
.testRunEnded(Mockito.anyLong(), Mockito.any());
+ assertEquals(0, tracker.getCount());
}
/**
@@ -174,7 +186,8 @@ public class TestsPoolPollerTest {
s.setOptionValue("run-a-test", "true");
testsList.add(test);
}
- TestsPoolPoller poller = new TestsPoolPoller(testsList);
+ CountDownLatch tracker = new CountDownLatch(1);
+ TestsPoolPoller poller = new TestsPoolPoller(testsList, tracker);
poller.setDevice(mDevice);
try {
poller.run(mListener);
@@ -186,5 +199,44 @@ public class TestsPoolPollerTest {
Mockito.verify(mListener, Mockito.times(0))
.testRunStarted(Mockito.anyString(), Mockito.anyInt());
Mockito.verify(mListener, Mockito.times(0)).testRunEnded(Mockito.anyLong(), Mockito.any());
+ assertEquals(0, tracker.getCount());
+ }
+
+ /**
+ * If a device not available exception is thrown from a tests, and the poller is not the last
+ * one alive, we wait and attempt to recover the device. In case of success, execution will
+ * proceed.
+ */
+ @Test
+ public void testRun_dnae_NotLastDevice() throws Exception {
+ List<IRemoteTest> testsList = new ArrayList<>();
+ // Add one bad test first that will throw an exception.
+ IRemoteTest badTest = new StubTest();
+ OptionSetter setter = new OptionSetter(badTest);
+ setter.setOptionValue("test-throw-not-available", "true");
+ testsList.add(badTest);
+ // Add tests that can run
+ int numTests = 5;
+ for (int i = 0; i < numTests; i++) {
+ IRemoteTest test = new StubTest();
+ OptionSetter s = new OptionSetter(test);
+ s.setOptionValue("run-a-test", "true");
+ testsList.add(test);
+ }
+ CountDownLatch tracker = new CountDownLatch(3);
+ TestsPoolPoller poller = new TestsPoolPoller(testsList, tracker);
+ poller.setDevice(mDevice);
+
+ poller.run(mListener);
+ // The callbacks from all the other tests because the device was recovered
+ Mockito.verify(mListener, Mockito.times(numTests))
+ .testRunStarted(Mockito.anyString(), Mockito.anyInt());
+ Mockito.verify(mListener, Mockito.times(numTests)).testStarted(Mockito.any());
+ Mockito.verify(mListener, Mockito.times(numTests)).testEnded(Mockito.any(), Mockito.any());
+ Mockito.verify(mListener, Mockito.times(numTests))
+ .testRunEnded(Mockito.anyLong(), Mockito.any());
+ Mockito.verify(mDevice).waitForDeviceAvailable(Mockito.anyLong());
+ Mockito.verify(mDevice).reboot();
+ assertEquals(2, tracker.getCount());
}
}