diff options
author | Alex Iacobucci <alexiacobucci@google.com> | 2024-02-02 19:45:12 +0000 |
---|---|---|
committer | Alex Iacobucci <alexiacobucci@google.com> | 2024-02-27 00:32:07 +0000 |
commit | 41cf4ac863e5188393d9a9f0fc16c829e1602d13 (patch) | |
tree | e3bc8df1129a1c04e1373cd40b1941928f02b683 | |
parent | e537f38acba686723e0e611782726a9958b349b6 (diff) | |
download | aoc-41cf4ac863e5188393d9a9f0fc16c829e1602d13.tar.gz |
aoc: improve handling of SSR requests
Prevents concurrent execution of SSR.
Uses trylock and no_sync to avoid b/322826918
Bug: 304888713
Test: triggered many SSRs
Signed-off-by: Alex Iacobucci <alexiacobucci@google.com>
Change-Id: I501c20ce79a9aaea52f1308218db46e34d8eb462
(cherry picked from commit 4079d32cb9ef977e77eb1d92f6789f5cacdc2dbd)
-rw-r--r-- | aoc.c | 64 | ||||
-rw-r--r-- | aoc.h | 2 | ||||
-rw-r--r-- | aoc_v1.c | 16 |
3 files changed, 50 insertions, 32 deletions
@@ -952,10 +952,7 @@ static ssize_t reset_store(struct device *dev, struct device_attribute *attr, if (prvdata->no_ap_resets) { dev_err(dev, "Reset request rejected, option disabled via persist options"); } else { - configure_crash_interrupts(prvdata, false); - strlcpy(prvdata->ap_reset_reason, reason_str, AP_RESET_REASON_LENGTH); - prvdata->ap_triggered_reset = true; - schedule_work(&prvdata->watchdog_work); + trigger_aoc_ssr(true, reason_str); } return count; } @@ -972,13 +969,7 @@ static ssize_t force_reload_store(struct device *dev, struct device_attribute *a while (work_busy(&prvdata->watchdog_work) || work_busy(&prvdata->monitor_work.work)); prvdata->force_release_aoc = false; - /* Disable IRQ if AoC is loaded for paired IRQ */ - if (aoc_state != AOC_STATE_OFFLINE) - disable_irq_nosync(prvdata->watchdog_irq); - - strlcpy(prvdata->ap_reset_reason, "Force Reload AoC", AP_RESET_REASON_LENGTH); - prvdata->ap_triggered_reset = true; - schedule_work(&prvdata->watchdog_work); + trigger_aoc_ssr(true, "Force Reload AoC"); return count; } @@ -1317,10 +1308,7 @@ static void aoc_monitor_online(struct work_struct *work) /* TODO: figure out if this still causes APC watchdogs on GS201 */ return; - disable_irq_nosync(prvdata->watchdog_irq); - strlcpy(prvdata->ap_reset_reason, "Monitor Reset", AP_RESET_REASON_LENGTH); - prvdata->ap_triggered_reset = true; - schedule_work(&prvdata->watchdog_work); + trigger_aoc_ssr(true, "AOC detected not online"); } } @@ -1674,6 +1662,28 @@ void aoc_remove_map_handler(struct aoc_service_dev *dev) } EXPORT_SYMBOL_GPL(aoc_remove_map_handler); +void trigger_aoc_ssr(bool ap_triggered_reset, char *reset_reason) { + struct aoc_prvdata *prvdata = platform_get_drvdata(aoc_platform_device); + if (!mutex_trylock(&aoc_service_lock)) { + dev_info(prvdata->dev, "AOC SSR: could not acquire mutex\n"); + return; + } else { + bool aoc_in_ssr = aoc_state == AOC_STATE_SSR; + mutex_unlock(&aoc_service_lock); + if (aoc_in_ssr) { + dev_err(prvdata->dev, "Reset request rejected, AOC already in SSR\n"); + } else { + configure_crash_interrupts(prvdata, false); + if (ap_triggered_reset) { + strlcpy(prvdata->ap_reset_reason, reset_reason, + AP_RESET_REASON_LENGTH); + prvdata->ap_triggered_reset = true; + } + schedule_work(&prvdata->watchdog_work); + } + } +} + static void aoc_watchdog(struct work_struct *work) { struct aoc_prvdata *prvdata = @@ -1697,16 +1707,28 @@ static void aoc_watchdog(struct work_struct *work) int sscd_rc; char crash_info[RAMDUMP_SECTION_CRASH_INFO_SIZE]; int restart_rc; - bool ap_reset = false, valid_magic; + bool ap_triggered_reset, valid_magic; struct aoc_section_header *crash_info_section; - aoc_state = AOC_STATE_SSR; + /* If we're already in SSR state, do nothing. */ + mutex_lock(&aoc_service_lock); + if (aoc_state == AOC_STATE_SSR) { + mutex_unlock(&aoc_service_lock); + return; + } else { + aoc_state = AOC_STATE_SSR; + mutex_unlock(&aoc_service_lock); + } + prvdata->total_restarts++; + ap_triggered_reset = prvdata->ap_triggered_reset; + prvdata->ap_triggered_reset = false; + /* Initialize crash_info[0] to identify if it has changed later in the function. */ crash_info[0] = 0; - if (prvdata->ap_triggered_reset) { + if (ap_triggered_reset) { if ((ktime_get_real_ns() - prvdata->last_reset_time_ns) / 1000000 <= prvdata->reset_hysteresis_trigger_ms) { /* If the watchdog was triggered recently, busy wait to @@ -1738,11 +1760,9 @@ static void aoc_watchdog(struct work_struct *work) goto err_coredump; } - if (prvdata->ap_triggered_reset) { + if (ap_triggered_reset) { dev_info(prvdata->dev, "AP triggered reset, reason: [%s]", prvdata->ap_reset_reason); - prvdata->ap_triggered_reset = false; - ap_reset = true; trigger_aoc_ramdump(prvdata); } @@ -1820,7 +1840,7 @@ static void aoc_watchdog(struct work_struct *work) sscd_info.segs[0].addr = prvdata->dram_virt; } - if (ap_reset) { + if (ap_triggered_reset) { /* Prefer the user specified reason */ scnprintf(crash_info, sizeof(crash_info), "AP Reset: %s", prvdata->ap_reset_reason); } @@ -291,6 +291,8 @@ void configure_crash_interrupts(struct aoc_prvdata *prvdata, bool enable); void notify_timeout_aoc_status(void); +void trigger_aoc_ssr(bool ap_triggered_reset, char* reset_reason); + #define AOC_SERVICE_NAME_LENGTH 32 /* Rings should have the ring flag set, slots = 1, size = ring size @@ -437,14 +437,7 @@ EXPORT_SYMBOL_GPL(aoc_unlocked_ioctl_handle_ion_fd); static irqreturn_t watchdog_int_handler(int irq, void *dev) { - struct aoc_prvdata *prvdata = dev_get_drvdata(dev); - - /* AP shouldn't access AoC registers to clear the IRQ. */ - /* Mask the IRQ until the IRQ gets cleared by AoC reset during SSR. */ - disable_irq_nosync(irq); - aoc_state = AOC_STATE_SSR; - schedule_work(&prvdata->watchdog_work); - + trigger_aoc_ssr(false, NULL); return IRQ_HANDLED; } @@ -540,8 +533,11 @@ void configure_crash_interrupts(struct aoc_prvdata *prvdata, bool enable) enable_irq(prvdata->sysmmu_secure_irq); enable_irq(prvdata->watchdog_irq); } else { - disable_irq(prvdata->sysmmu_nonsecure_irq); - disable_irq(prvdata->sysmmu_secure_irq); + disable_irq_nosync(prvdata->sysmmu_nonsecure_irq); + disable_irq_nosync(prvdata->sysmmu_secure_irq); + /* Need to disable it to let APM handle it once we + * retrigger it in aoc_watchdog_restart. + */ disable_irq_nosync(prvdata->watchdog_irq); } } |