summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Iacobucci <alexiacobucci@google.com>2024-02-02 19:45:12 +0000
committerAlex Iacobucci <alexiacobucci@google.com>2024-02-27 00:32:07 +0000
commit41cf4ac863e5188393d9a9f0fc16c829e1602d13 (patch)
treee3bc8df1129a1c04e1373cd40b1941928f02b683
parente537f38acba686723e0e611782726a9958b349b6 (diff)
downloadaoc-41cf4ac863e5188393d9a9f0fc16c829e1602d13.tar.gz
aoc: improve handling of SSR requests
Prevents concurrent execution of SSR. Uses trylock and no_sync to avoid b/322826918 Bug: 304888713 Test: triggered many SSRs Signed-off-by: Alex Iacobucci <alexiacobucci@google.com> Change-Id: I501c20ce79a9aaea52f1308218db46e34d8eb462 (cherry picked from commit 4079d32cb9ef977e77eb1d92f6789f5cacdc2dbd)
-rw-r--r--aoc.c64
-rw-r--r--aoc.h2
-rw-r--r--aoc_v1.c16
3 files changed, 50 insertions, 32 deletions
diff --git a/aoc.c b/aoc.c
index bb5da79..dbf45c2 100644
--- a/aoc.c
+++ b/aoc.c
@@ -952,10 +952,7 @@ static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
if (prvdata->no_ap_resets) {
dev_err(dev, "Reset request rejected, option disabled via persist options");
} else {
- configure_crash_interrupts(prvdata, false);
- strlcpy(prvdata->ap_reset_reason, reason_str, AP_RESET_REASON_LENGTH);
- prvdata->ap_triggered_reset = true;
- schedule_work(&prvdata->watchdog_work);
+ trigger_aoc_ssr(true, reason_str);
}
return count;
}
@@ -972,13 +969,7 @@ static ssize_t force_reload_store(struct device *dev, struct device_attribute *a
while (work_busy(&prvdata->watchdog_work) || work_busy(&prvdata->monitor_work.work));
prvdata->force_release_aoc = false;
- /* Disable IRQ if AoC is loaded for paired IRQ */
- if (aoc_state != AOC_STATE_OFFLINE)
- disable_irq_nosync(prvdata->watchdog_irq);
-
- strlcpy(prvdata->ap_reset_reason, "Force Reload AoC", AP_RESET_REASON_LENGTH);
- prvdata->ap_triggered_reset = true;
- schedule_work(&prvdata->watchdog_work);
+ trigger_aoc_ssr(true, "Force Reload AoC");
return count;
}
@@ -1317,10 +1308,7 @@ static void aoc_monitor_online(struct work_struct *work)
/* TODO: figure out if this still causes APC watchdogs on GS201 */
return;
- disable_irq_nosync(prvdata->watchdog_irq);
- strlcpy(prvdata->ap_reset_reason, "Monitor Reset", AP_RESET_REASON_LENGTH);
- prvdata->ap_triggered_reset = true;
- schedule_work(&prvdata->watchdog_work);
+ trigger_aoc_ssr(true, "AOC detected not online");
}
}
@@ -1674,6 +1662,28 @@ void aoc_remove_map_handler(struct aoc_service_dev *dev)
}
EXPORT_SYMBOL_GPL(aoc_remove_map_handler);
+void trigger_aoc_ssr(bool ap_triggered_reset, char *reset_reason) {
+ struct aoc_prvdata *prvdata = platform_get_drvdata(aoc_platform_device);
+ if (!mutex_trylock(&aoc_service_lock)) {
+ dev_info(prvdata->dev, "AOC SSR: could not acquire mutex\n");
+ return;
+ } else {
+ bool aoc_in_ssr = aoc_state == AOC_STATE_SSR;
+ mutex_unlock(&aoc_service_lock);
+ if (aoc_in_ssr) {
+ dev_err(prvdata->dev, "Reset request rejected, AOC already in SSR\n");
+ } else {
+ configure_crash_interrupts(prvdata, false);
+ if (ap_triggered_reset) {
+ strlcpy(prvdata->ap_reset_reason, reset_reason,
+ AP_RESET_REASON_LENGTH);
+ prvdata->ap_triggered_reset = true;
+ }
+ schedule_work(&prvdata->watchdog_work);
+ }
+ }
+}
+
static void aoc_watchdog(struct work_struct *work)
{
struct aoc_prvdata *prvdata =
@@ -1697,16 +1707,28 @@ static void aoc_watchdog(struct work_struct *work)
int sscd_rc;
char crash_info[RAMDUMP_SECTION_CRASH_INFO_SIZE];
int restart_rc;
- bool ap_reset = false, valid_magic;
+ bool ap_triggered_reset, valid_magic;
struct aoc_section_header *crash_info_section;
- aoc_state = AOC_STATE_SSR;
+ /* If we're already in SSR state, do nothing. */
+ mutex_lock(&aoc_service_lock);
+ if (aoc_state == AOC_STATE_SSR) {
+ mutex_unlock(&aoc_service_lock);
+ return;
+ } else {
+ aoc_state = AOC_STATE_SSR;
+ mutex_unlock(&aoc_service_lock);
+ }
+
prvdata->total_restarts++;
+ ap_triggered_reset = prvdata->ap_triggered_reset;
+ prvdata->ap_triggered_reset = false;
+
/* Initialize crash_info[0] to identify if it has changed later in the function. */
crash_info[0] = 0;
- if (prvdata->ap_triggered_reset) {
+ if (ap_triggered_reset) {
if ((ktime_get_real_ns() - prvdata->last_reset_time_ns) / 1000000
<= prvdata->reset_hysteresis_trigger_ms) {
/* If the watchdog was triggered recently, busy wait to
@@ -1738,11 +1760,9 @@ static void aoc_watchdog(struct work_struct *work)
goto err_coredump;
}
- if (prvdata->ap_triggered_reset) {
+ if (ap_triggered_reset) {
dev_info(prvdata->dev, "AP triggered reset, reason: [%s]",
prvdata->ap_reset_reason);
- prvdata->ap_triggered_reset = false;
- ap_reset = true;
trigger_aoc_ramdump(prvdata);
}
@@ -1820,7 +1840,7 @@ static void aoc_watchdog(struct work_struct *work)
sscd_info.segs[0].addr = prvdata->dram_virt;
}
- if (ap_reset) {
+ if (ap_triggered_reset) {
/* Prefer the user specified reason */
scnprintf(crash_info, sizeof(crash_info), "AP Reset: %s", prvdata->ap_reset_reason);
}
diff --git a/aoc.h b/aoc.h
index df4f9d6..4659601 100644
--- a/aoc.h
+++ b/aoc.h
@@ -291,6 +291,8 @@ void configure_crash_interrupts(struct aoc_prvdata *prvdata, bool enable);
void notify_timeout_aoc_status(void);
+void trigger_aoc_ssr(bool ap_triggered_reset, char* reset_reason);
+
#define AOC_SERVICE_NAME_LENGTH 32
/* Rings should have the ring flag set, slots = 1, size = ring size
diff --git a/aoc_v1.c b/aoc_v1.c
index 58572d2..bf3f18d 100644
--- a/aoc_v1.c
+++ b/aoc_v1.c
@@ -437,14 +437,7 @@ EXPORT_SYMBOL_GPL(aoc_unlocked_ioctl_handle_ion_fd);
static irqreturn_t watchdog_int_handler(int irq, void *dev)
{
- struct aoc_prvdata *prvdata = dev_get_drvdata(dev);
-
- /* AP shouldn't access AoC registers to clear the IRQ. */
- /* Mask the IRQ until the IRQ gets cleared by AoC reset during SSR. */
- disable_irq_nosync(irq);
- aoc_state = AOC_STATE_SSR;
- schedule_work(&prvdata->watchdog_work);
-
+ trigger_aoc_ssr(false, NULL);
return IRQ_HANDLED;
}
@@ -540,8 +533,11 @@ void configure_crash_interrupts(struct aoc_prvdata *prvdata, bool enable)
enable_irq(prvdata->sysmmu_secure_irq);
enable_irq(prvdata->watchdog_irq);
} else {
- disable_irq(prvdata->sysmmu_nonsecure_irq);
- disable_irq(prvdata->sysmmu_secure_irq);
+ disable_irq_nosync(prvdata->sysmmu_nonsecure_irq);
+ disable_irq_nosync(prvdata->sysmmu_secure_irq);
+ /* Need to disable it to let APM handle it once we
+ * retrigger it in aoc_watchdog_restart.
+ */
disable_irq_nosync(prvdata->watchdog_irq);
}
}