From d4ab7699c42a21bc8fe63d4cda15ede80041df8e Mon Sep 17 00:00:00 2001 From: jdesprez Date: Thu, 8 Jun 2017 14:55:24 -0700 Subject: Log more interesting events and allow strong poller - Log some device events to history_log in order to see more clearly when device went down and up again. - Allow poller to recover from DNAE if device come back online in a reasonable time. Test: run cts-suite --shard-count 5 unit tests (cts, tf) run cts-suite -m CtsSecurityHostTestCases Bug: 37211399 Change-Id: I96d8ce3b6ddb32812a14860954cf0fbfb9ed4dcb --- src/com/android/tradefed/device/DeviceManager.java | 1 + .../tradefed/invoker/shard/ShardHelper.java | 7 +- .../tradefed/invoker/shard/TestsPoolPoller.java | 151 ++++++++++++++------- src/com/android/tradefed/log/ILogRegistry.java | 3 + .../tradefed/testtype/suite/ModuleDefinition.java | 32 +++++ 5 files changed, 140 insertions(+), 54 deletions(-) (limited to 'src/com/android') diff --git a/src/com/android/tradefed/device/DeviceManager.java b/src/com/android/tradefed/device/DeviceManager.java index 9a75239f7..da914808e 100644 --- a/src/com/android/tradefed/device/DeviceManager.java +++ b/src/com/android/tradefed/device/DeviceManager.java @@ -1014,6 +1014,7 @@ public class DeviceManager implements IDeviceManager { } else if (DeviceState.OFFLINE.equals(idevice.getState()) || DeviceState.UNAUTHORIZED.equals(idevice.getState())) { mManagedDeviceList.handleDeviceEvent(testDevice, DeviceEvent.CONNECTED_OFFLINE); + logDeviceEvent(EventType.DEVICE_CONNECTED_OFFLINE, testDevice.getSerialNumber()); } mFirstDeviceAdded.countDown(); } diff --git a/src/com/android/tradefed/invoker/shard/ShardHelper.java b/src/com/android/tradefed/invoker/shard/ShardHelper.java index bc3b44879..d5997182e 100644 --- a/src/com/android/tradefed/invoker/shard/ShardHelper.java +++ b/src/com/android/tradefed/invoker/shard/ShardHelper.java @@ -36,6 +36,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.concurrent.CountDownLatch; /** Helper class that handles creating the shards and scheduling them for an invocation. */ public class ShardHelper implements IShardHelper { @@ -88,17 +89,19 @@ public class ShardHelper implements IShardHelper { // contiguously in the list. Collections.shuffle(shardableTests); int maxShard = Math.min(shardCount, shardableTests.size()); + CountDownLatch tracker = new CountDownLatch(maxShard); for (int i = 0; i < maxShard; i++) { IConfiguration shardConfig = config.clone(); - shardConfig.setTest(new TestsPoolPoller(shardableTests)); + shardConfig.setTest(new TestsPoolPoller(shardableTests, tracker)); rescheduleConfig(shardConfig, config, context, rescheduler, resultCollector); } } else { + CountDownLatch tracker = new CountDownLatch(shardableTests.size()); for (IRemoteTest testShard : shardableTests) { CLog.i("Rescheduling sharded config..."); IConfiguration shardConfig = config.clone(); if (config.getCommandOptions().shouldUseDynamicSharding()) { - shardConfig.setTest(new TestsPoolPoller(shardableTests)); + shardConfig.setTest(new TestsPoolPoller(shardableTests, tracker)); } else { shardConfig.setTest(testShard); } diff --git a/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java b/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java index 1d02e2c39..7ffce6639 100644 --- a/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java +++ b/src/com/android/tradefed/invoker/shard/TestsPoolPoller.java @@ -15,6 +15,7 @@ */ package com.android.tradefed.invoker.shard; +import com.android.ddmlib.Log.LogLevel; import com.android.tradefed.build.IBuildInfo; import com.android.tradefed.config.IConfiguration; import com.android.tradefed.config.IConfigurationReceiver; @@ -22,6 +23,8 @@ import com.android.tradefed.device.DeviceNotAvailableException; import com.android.tradefed.device.DeviceUnresponsiveException; import com.android.tradefed.device.ITestDevice; import com.android.tradefed.invoker.IInvocationContext; +import com.android.tradefed.log.ILogRegistry.EventType; +import com.android.tradefed.log.LogRegistry; import com.android.tradefed.log.LogUtil.CLog; import com.android.tradefed.result.ITestInvocationListener; import com.android.tradefed.suite.checker.ISystemStatusChecker; @@ -32,10 +35,13 @@ import com.android.tradefed.testtype.IInvocationContextReceiver; import com.android.tradefed.testtype.IMultiDeviceTest; import com.android.tradefed.testtype.IRemoteTest; import com.android.tradefed.testtype.ITestCollector; +import com.android.tradefed.util.StreamUtil; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.CountDownLatch; /** * Tests wrapper that allow to execute all the tests of a pool of tests. Tests can be shared by @@ -53,7 +59,10 @@ public class TestsPoolPoller ISystemStatusCheckerReceiver, ITestCollector { + private static final long WAIT_RECOVERY_TIME = 5 * 60 * 1000; + private Collection mGenericPool; + private CountDownLatch mTracker; private ITestDevice mDevice; private IBuildInfo mBuildInfo; @@ -67,9 +76,11 @@ public class TestsPoolPoller * Ctor where the pool of {@link IRemoteTest} is provided. * * @param tests {@link IRemoteTest}s pool of all tests. + * @param tracker a {@link CountDownLatch} shared to get the number of running poller. */ - public TestsPoolPoller(Collection tests) { + public TestsPoolPoller(Collection tests, CountDownLatch tracker) { mGenericPool = tests; + mTracker = tracker; } /** Returns the first {@link IRemoteTest} from the pool or null if none remaining. */ @@ -87,60 +98,96 @@ public class TestsPoolPoller /** {@inheritDoc} */ @Override public void run(ITestInvocationListener listener) throws DeviceNotAvailableException { - while (true) { - IRemoteTest test = poll(); - if (test == null) { - return; - } - if (test instanceof IBuildReceiver) { - ((IBuildReceiver) test).setBuild(mBuildInfo); - } - if (test instanceof IConfigurationReceiver) { - ((IConfigurationReceiver) test).setConfiguration(mConfig); - } - if (test instanceof IDeviceTest) { - ((IDeviceTest) test).setDevice(mDevice); - } - if (test instanceof IInvocationContextReceiver) { - ((IInvocationContextReceiver) test).setInvocationContext(mContext); - } - if (test instanceof IMultiDeviceTest) { - ((IMultiDeviceTest) test).setDeviceInfos(mDeviceInfos); - } - if (test instanceof ISystemStatusCheckerReceiver) { - ((ISystemStatusCheckerReceiver) test).setSystemStatusChecker(mSystemStatusCheckers); - } - if (test instanceof ITestCollector) { - ((ITestCollector) test).setCollectTestsOnly(mShouldCollectTest); + try { + while (true) { + IRemoteTest test = poll(); + if (test == null) { + return; + } + if (test instanceof IBuildReceiver) { + ((IBuildReceiver) test).setBuild(mBuildInfo); + } + if (test instanceof IConfigurationReceiver) { + ((IConfigurationReceiver) test).setConfiguration(mConfig); + } + if (test instanceof IDeviceTest) { + ((IDeviceTest) test).setDevice(mDevice); + } + if (test instanceof IInvocationContextReceiver) { + ((IInvocationContextReceiver) test).setInvocationContext(mContext); + } + if (test instanceof IMultiDeviceTest) { + ((IMultiDeviceTest) test).setDeviceInfos(mDeviceInfos); + } + if (test instanceof ISystemStatusCheckerReceiver) { + ((ISystemStatusCheckerReceiver) test) + .setSystemStatusChecker(mSystemStatusCheckers); + } + if (test instanceof ITestCollector) { + ((ITestCollector) test).setCollectTestsOnly(mShouldCollectTest); + } + // Run the test itself and prevent random exception from stopping the poller. + try { + test.run(listener); + } catch (RuntimeException e) { + CLog.e( + "Caught an Exception in a test: %s. Proceeding to next test.", + test.getClass()); + CLog.e(e); + } catch (DeviceUnresponsiveException due) { + // being able to catch a DeviceUnresponsiveException here implies that recovery was + // successful, and test execution should proceed to next test. + CLog.w( + "Ignored DeviceUnresponsiveException because recovery was " + + "successful, proceeding with next test. Stack trace:"); + CLog.w(due); + CLog.w("Proceeding to the next test."); + } catch (DeviceNotAvailableException dnae) { + HandleDeviceNotAvailable(dnae, test); + } } - // Run the test itself and prevent random exception from stopping the poller. - try { - test.run(listener); - } catch (RuntimeException e) { - CLog.e( - "Caught an Exception in a test: %s. Proceeding to next test.", - test.getClass()); - CLog.e(e); - } catch (DeviceUnresponsiveException due) { - // being able to catch a DeviceUnresponsiveException here implies that recovery was - // successful, and test execution should proceed to next test. - CLog.w( - "Ignored DeviceUnresponsiveException because recovery was " - + "successful, proceeding with next test. Stack trace:"); - CLog.w(due); - CLog.w("Proceeding to the next test."); - } catch (DeviceNotAvailableException dnae) { - // We catch and rethrow in order to log that the poller associated with the device - // that went offline is terminating. - CLog.e( - "Test %s threw DeviceNotAvailableException. Test poller associated with " - + "device %s is terminating.", - test.getClass(), mDevice.getSerialNumber()); - // TODO: Add a fail-safe mechanism in case all pollers terminate and we still have - // tests in the pool. - throw dnae; + } finally { + mTracker.countDown(); + } + } + + /** + * Helper to wait for the device to maybe come back online, in that case we reboot it to refresh + * the state and proceed with execution. + */ + void HandleDeviceNotAvailable(DeviceNotAvailableException originalException, IRemoteTest test) + throws DeviceNotAvailableException { + try { + if (mTracker.getCount() > 1) { + CLog.d("Wait 5 min for device to maybe coming back online."); + mDevice.waitForDeviceAvailable(WAIT_RECOVERY_TIME); + mDevice.reboot(); + CLog.d("TestPoller was recovered after %s went offline", mDevice.getSerialNumber()); + return; } + // We catch and rethrow in order to log that the poller associated with the device + // that went offline is terminating. + CLog.e( + "Test %s threw DeviceNotAvailableException. Test poller associated with " + + "device %s is terminating.", + test.getClass(), mDevice.getSerialNumber()); + // Log an event to track more easily the failure + logDeviceEvent( + EventType.SHARD_POLLER_EARLY_TERMINATION, + mDevice.getSerialNumber(), + originalException); + } catch (DeviceNotAvailableException e) { + // ignore this exception } + throw originalException; + } + + /** Helper to log the device events. */ + private void logDeviceEvent(EventType event, String serial, Throwable t) { + Map args = new HashMap<>(); + args.put("serial", serial); + args.put("trace", StreamUtil.getStackTrace(t)); + LogRegistry.getLogRegistry().logEvent(LogLevel.DEBUG, event, args); } @Override diff --git a/src/com/android/tradefed/log/ILogRegistry.java b/src/com/android/tradefed/log/ILogRegistry.java index e541a121a..c08e0820e 100644 --- a/src/com/android/tradefed/log/ILogRegistry.java +++ b/src/com/android/tradefed/log/ILogRegistry.java @@ -31,10 +31,13 @@ public interface ILogRegistry extends ILogOutput { /** Events that are useful to be logged */ public enum EventType { DEVICE_CONNECTED, + DEVICE_CONNECTED_OFFLINE, DEVICE_DISCONNECTED, INVOCATION_START, INVOCATION_END, HEAP_MEMORY, + SHARD_POLLER_EARLY_TERMINATION, + MODULE_DEVICE_NOT_AVAILABLE, } /** diff --git a/src/com/android/tradefed/testtype/suite/ModuleDefinition.java b/src/com/android/tradefed/testtype/suite/ModuleDefinition.java index a1d42898e..ef193b789 100644 --- a/src/com/android/tradefed/testtype/suite/ModuleDefinition.java +++ b/src/com/android/tradefed/testtype/suite/ModuleDefinition.java @@ -15,6 +15,7 @@ */ package com.android.tradefed.testtype.suite; +import com.android.ddmlib.Log.LogLevel; import com.android.ddmlib.testrunner.TestIdentifier; import com.android.ddmlib.testrunner.TestResult; import com.android.ddmlib.testrunner.TestRunResult; @@ -23,7 +24,9 @@ import com.android.tradefed.device.DeviceNotAvailableException; import com.android.tradefed.device.DeviceUnresponsiveException; import com.android.tradefed.device.ITestDevice; import com.android.tradefed.invoker.IInvocationContext; +import com.android.tradefed.log.ILogRegistry.EventType; import com.android.tradefed.log.ITestLogger; +import com.android.tradefed.log.LogRegistry; import com.android.tradefed.log.LogUtil.CLog; import com.android.tradefed.result.ITestInvocationListener; import com.android.tradefed.result.ITestLoggerReceiver; @@ -37,6 +40,7 @@ import com.android.tradefed.testtype.IBuildReceiver; import com.android.tradefed.testtype.IDeviceTest; import com.android.tradefed.testtype.IRemoteTest; import com.android.tradefed.testtype.ITestCollector; +import com.android.tradefed.util.StreamUtil; import com.google.common.annotations.VisibleForTesting; @@ -249,6 +253,20 @@ public class ModuleDefinition implements Comparable, ITestColl + "successful, proceeding with next module. Stack trace:"); CLog.w(due); CLog.w("Proceeding to the next test."); + } catch (DeviceNotAvailableException dnae) { + // We do special logging of some information in Context of the module for easier + // debugging. + CLog.e( + "Module %s threw a DeviceNotAvailableException on device %s during test %s", + getId(), mDevice.getSerialNumber(), test.getClass()); + CLog.e(dnae); + // log an events + logDeviceEvent( + EventType.MODULE_DEVICE_NOT_AVAILABLE, + mDevice.getSerialNumber(), + dnae, + getId()); + throw dnae; } finally { mTestsResults.addAll(moduleListener.getRunResults()); mExpectedTests += moduleListener.getNumTotalTests(); @@ -262,6 +280,11 @@ public class ModuleDefinition implements Comparable, ITestColl CLog.d("Cleaner: %s", cleaner.getClass().getSimpleName()); cleaner.tearDown(mDevice, mBuild, null); } + } catch (DeviceNotAvailableException tearDownException) { + CLog.e( + "Module %s failed during tearDown with: %s", + getId(), StreamUtil.getStackTrace(tearDownException)); + throw tearDownException; } finally { mElapsedTearDown = getCurrentTime() - cleanStartTime; // finalize results @@ -272,6 +295,15 @@ public class ModuleDefinition implements Comparable, ITestColl } } + /** Helper to log the device events. */ + private void logDeviceEvent(EventType event, String serial, Throwable t, String moduleId) { + Map args = new HashMap<>(); + args.put("serial", serial); + args.put("trace", StreamUtil.getStackTrace(t)); + args.put("module-id", moduleId); + LogRegistry.getLogRegistry().logEvent(LogLevel.DEBUG, event, args); + } + /** Finalize results to report them all and count if there are missing tests. */ private void reportFinalResults( ITestInvocationListener listener, -- cgit v1.2.3