From bf327e7a4e7a54b00eea3e5d2637eb50bb8fc9fe Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 4 Jan 2011 19:38:20 -0800 Subject: refresh the patches to 2.6.37 final --- TODO | 2 +- ...racing-Make-timer-tracing-actually-useful.patch | 64 ----- ...dd-basic-tracepoints-to-track-workqueue-e.patch | 129 --------- patches/linux-2.6.37-ahci-alpm-accounting.patch | 302 +++++++++++++++++++++ .../linux-2.6.37-rc1-ahci-alpm-accounting.patch | 302 --------------------- patches/linux-2.6.37-rc3-vfs-dirty-inode.patch | 105 ------- patches/linux-2.6.37-vfs-dirty-inode.patch | 102 +++++++ 7 files changed, 405 insertions(+), 601 deletions(-) delete mode 100644 patches/0001-tracing-Make-timer-tracing-actually-useful.patch delete mode 100644 patches/0001-workqueue-Add-basic-tracepoints-to-track-workqueue-e.patch create mode 100644 patches/linux-2.6.37-ahci-alpm-accounting.patch delete mode 100644 patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch delete mode 100644 patches/linux-2.6.37-rc3-vfs-dirty-inode.patch create mode 100644 patches/linux-2.6.37-vfs-dirty-inode.patch diff --git a/TODO b/TODO index 6811eb2..4fe15d6 100644 --- a/TODO +++ b/TODO @@ -6,11 +6,11 @@ Needed prior to beta -- growth in memory seems to not be there.. it's just exit leaks... yawn. -* htmlize UTF8 mu Needed between beta and 2.0 final ---------------------------------- * MUST use the 2.6.38 new stable tracepoints where available +* htmlize UTF8 mu * more translations / strings * end user documentation * investigate tick_sched_timer and why it wakes up diff --git a/patches/0001-tracing-Make-timer-tracing-actually-useful.patch b/patches/0001-tracing-Make-timer-tracing-actually-useful.patch deleted file mode 100644 index 1d8e21b..0000000 --- a/patches/0001-tracing-Make-timer-tracing-actually-useful.patch +++ /dev/null @@ -1,64 +0,0 @@ -From fe9633af11395d339880417439a1931bb9e7e493 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 18 Aug 2010 15:28:59 -0400 -Subject: [PATCH] tracing: Make timer tracing actually useful - -PowerTOP would like to be able to trace timers. -Unfortunately, the current timer tracing is not very useful, the actual -timer function is not recorded in the trace at the start of timer execution. - -Although this is recorded for timer "start" time (when it gets armed), this -is not useful; most timers get started early, and a tracer like PowerTOP -will never see this event, but will only see the actual running of the timer. - -This patch just adds the function to the timer tracing; I've verified with -PowerTOP that now it can get useful information about timers. - -Signed-off-by: Arjan van de Ven ---- - include/trace/events/timer.h | 8 ++++++-- - 1 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h -index c624126..94511c8 100644 ---- a/include/trace/events/timer.h -+++ b/include/trace/events/timer.h -@@ -81,14 +81,16 @@ TRACE_EVENT(timer_expire_entry, - TP_STRUCT__entry( - __field( void *, timer ) - __field( unsigned long, now ) -+ __field( void *, function ) - ), - - TP_fast_assign( - __entry->timer = timer; - __entry->now = jiffies; -+ __entry->function = timer->function; - ), - -- TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) -+ TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) - ); - - /** -@@ -200,14 +202,16 @@ TRACE_EVENT(hrtimer_expire_entry, - TP_STRUCT__entry( - __field( void *, hrtimer ) - __field( s64, now ) -+ __field( void *, function ) - ), - - TP_fast_assign( - __entry->hrtimer = hrtimer; - __entry->now = now->tv64; -+ __entry->function = hrtimer->function; - ), - -- TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, -+ TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, - (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) - ); - --- -1.6.1.3 - diff --git a/patches/0001-workqueue-Add-basic-tracepoints-to-track-workqueue-e.patch b/patches/0001-workqueue-Add-basic-tracepoints-to-track-workqueue-e.patch deleted file mode 100644 index 11db56b..0000000 --- a/patches/0001-workqueue-Add-basic-tracepoints-to-track-workqueue-e.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 1fd789ef484066ed5583c94dc03ef066cc1c0d94 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Sat, 21 Aug 2010 13:07:26 -0700 -Subject: [PATCH] workqueue: Add basic tracepoints to track workqueue execution - -With the introduction of the new unified work queue thread pools, -we lost one feature: It's no longer possible to know which worker -is causing the CPU to wake out of idle. The result is that PowerTOP -now reports a lot of "kworker/a:b" instead of more readable results. - -This patch adds a pair of tracepoints to the new workqueue code, -similar in style to the timer/hrtimer tracepoints. - -With this pair of tracepoints, the next PowerTOP can correctly -report which work item caused the wakeup (and how long it took): - -Interrupt (43) i915 time 3.51ms wakeups 141 -Work ieee80211_iface_work time 0.81ms wakeups 29 -Work do_dbs_timer time 0.55ms wakeups 24 -Process Xorg time 21.36ms wakeups 4 -Timer sched_rt_period_timer time 0.01ms wakeups 1 - -Signed-off-by: Arjan van de Ven ---- - include/trace/events/workqueue.h | 63 ++++++++++++++++++++++++++++++++++++++ - kernel/workqueue.c | 9 +++++ - 2 files changed, 72 insertions(+), 0 deletions(-) - create mode 100644 include/trace/events/workqueue.h - -diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h -new file mode 100644 -index 0000000..4fd2bd1 ---- /dev/null -+++ b/include/trace/events/workqueue.h -@@ -0,0 +1,63 @@ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM workqueue -+ -+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_WORKQUEUE_H -+ -+#include -+#include -+ -+/** -+ * workqueue_execute_start - called immediately before the workqueue callback -+ * @work: pointer to struct work_struct -+ * -+ * Allows to track workqueue execution. -+ */ -+TRACE_EVENT(workqueue_execute_start, -+ -+ TP_PROTO(struct work_struct *work), -+ -+ TP_ARGS(work), -+ -+ TP_STRUCT__entry( -+ __field( void *, work ) -+ __field( void *, function) -+ ), -+ -+ TP_fast_assign( -+ __entry->work = work; -+ __entry->function = work->func; -+ ), -+ -+ TP_printk("work struct %p: function %pf", __entry->work, __entry->function) -+); -+ -+/** -+ * workqueue_execute_end - called immediately before the workqueue callback -+ * @work: pointer to struct work_struct -+ * -+ * Allows to track workqueue execution. -+ */ -+TRACE_EVENT(workqueue_execute_end, -+ -+ TP_PROTO(struct work_struct *work), -+ -+ TP_ARGS(work), -+ -+ TP_STRUCT__entry( -+ __field( void *, work ) -+ ), -+ -+ TP_fast_assign( -+ __entry->work = work; -+ ), -+ -+ TP_printk("work struct %p", __entry->work) -+); -+ -+ -+#endif /* _TRACE_WORKQUEUE_H */ -+ -+/* This part must be outside protection */ -+#include -+ -diff --git a/kernel/workqueue.c b/kernel/workqueue.c -index 2994a0e..8bd600c 100644 ---- a/kernel/workqueue.c -+++ b/kernel/workqueue.c -@@ -35,6 +35,9 @@ - #include - #include - -+#define CREATE_TRACE_POINTS -+#include -+ - #include "workqueue_sched.h" - - enum { -@@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work) - work_clear_pending(work); - lock_map_acquire(&cwq->wq->lockdep_map); - lock_map_acquire(&lockdep_map); -+ trace_workqueue_execute_start(work); - f(work); -+ /* -+ * While we must be careful to not use "work" after this, the trace -+ * point will only record its address. -+ */ -+ trace_workqueue_execute_end(work); - lock_map_release(&lockdep_map); - lock_map_release(&cwq->wq->lockdep_map); - --- -1.7.1.1 - diff --git a/patches/linux-2.6.37-ahci-alpm-accounting.patch b/patches/linux-2.6.37-ahci-alpm-accounting.patch new file mode 100644 index 0000000..dbe897d --- /dev/null +++ b/patches/linux-2.6.37-ahci-alpm-accounting.patch @@ -0,0 +1,302 @@ +From: Arjan van de Ven +Subject: [PATCH] libata: Add ALPM power state accounting to the AHCI driver + +PowerTOP wants to be able to show the user how effective the ALPM link +power management is for the user. ALPM is worth around 0.5W on a quiet +link; PowerTOP wants to be able to find cases where the "quiet link" isn't +actually quiet. + +This patch adds state accounting functionality to the AHCI driver for +PowerTOP to use. +The parts of the patch are +1) the sysfs logic of exposing the stats for each state in sysfs +2) the basic accounting logic that gets update on link change interrupts + (or when the user accesses the info from sysfs) +3) a "accounting enable" flag; in order to get the accounting to work, + the driver needs to get phyrdy interrupts on link status changes. + Normally and currently this is disabled by the driver when ALPM is + on (to reduce overhead); when PowerTOP is running this will need + to be on to get usable statistics... hence the sysfs tunable. + +The PowerTOP output currently looks like this: + +Recent SATA AHCI link activity statistics +Active Partial Slumber Device name + 0.5% 99.5% 0.0% host0 + +(work to resolve "host0" to a more human readable name is in progress) + +Signed-off-by: Arjan van de Ven + +--- + drivers/ata/ahci.h | 15 ++++ + drivers/ata/libahci.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 200 insertions(+), 2 deletions(-) + +Index: linux-2.6.37/drivers/ata/ahci.h +=================================================================== +--- linux-2.6.37.orig/drivers/ata/ahci.h ++++ linux-2.6.37/drivers/ata/ahci.h +@@ -262,6 +262,13 @@ struct ahci_em_priv { + unsigned long led_state; + }; + ++enum ahci_port_states { ++ AHCI_PORT_NOLINK = 0, ++ AHCI_PORT_ACTIVE = 1, ++ AHCI_PORT_PARTIAL = 2, ++ AHCI_PORT_SLUMBER = 3 ++}; ++ + struct ahci_port_priv { + struct ata_link *active_link; + struct ahci_cmd_hdr *cmd_slot; +@@ -280,6 +287,14 @@ struct ahci_port_priv { + int fbs_last_dev; /* save FBS.DEV of last FIS */ + /* enclosure management info per PM slot */ + struct ahci_em_priv em_priv[EM_MAX_SLOTS]; ++ ++ /* ALPM accounting state and stats */ ++ unsigned int accounting_active:1; ++ u64 active_jiffies; ++ u64 partial_jiffies; ++ u64 slumber_jiffies; ++ int previous_state; ++ int previous_jiffies; + }; + + struct ahci_host_priv { +Index: linux-2.6.37/drivers/ata/libahci.c +=================================================================== +--- linux-2.6.37.orig/drivers/ata/libahci.c ++++ linux-2.6.37/drivers/ata/libahci.c +@@ -58,6 +58,17 @@ MODULE_PARM_DESC(ignore_sss, "Ignore sta + + static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); ++static ssize_t ahci_alpm_show_active(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_slumber(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_partial(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_accounting(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_set_accounting(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count); + static ssize_t ahci_led_show(struct ata_port *ap, char *buf); + static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, + size_t size); +@@ -117,6 +128,12 @@ static DEVICE_ATTR(ahci_host_caps, S_IRU + static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL); + static DEVICE_ATTR(ahci_host_version, S_IRUGO, ahci_show_host_version, NULL); + static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL); ++static DEVICE_ATTR(ahci_alpm_active, S_IRUGO, ahci_alpm_show_active, NULL); ++static DEVICE_ATTR(ahci_alpm_partial, S_IRUGO, ahci_alpm_show_partial, NULL); ++static DEVICE_ATTR(ahci_alpm_slumber, S_IRUGO, ahci_alpm_show_slumber, NULL); ++static DEVICE_ATTR(ahci_alpm_accounting, S_IRUGO | S_IWUSR, ++ ahci_alpm_show_accounting, ahci_alpm_set_accounting); ++ + static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, + ahci_read_em_buffer, ahci_store_em_buffer); + +@@ -128,6 +145,10 @@ struct device_attribute *ahci_shost_attr + &dev_attr_ahci_host_cap2, + &dev_attr_ahci_host_version, + &dev_attr_ahci_port_cmd, ++ &dev_attr_ahci_alpm_active, ++ &dev_attr_ahci_alpm_partial, ++ &dev_attr_ahci_alpm_slumber, ++ &dev_attr_ahci_alpm_accounting, + &dev_attr_em_buffer, + NULL + }; +@@ -653,9 +674,14 @@ static int ahci_set_lpm(struct ata_link + * Disable interrupts on Phy Ready. This keeps us from + * getting woken up due to spurious phy ready + * interrupts. ++ * ++ * However, when accounting_active is set, we do want ++ * the interrupts for accounting purposes. + */ +- pp->intr_mask &= ~PORT_IRQ_PHYRDY; +- writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ if (!pp->accounting_active) { ++ pp->intr_mask &= ~PORT_IRQ_PHYRDY; ++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ } + + sata_link_scr_lpm(link, policy, false); + } +@@ -1570,6 +1596,162 @@ static void ahci_error_intr(struct ata_p + ata_port_abort(ap); + } + ++static int get_current_alpm_state(struct ata_port *ap) ++{ ++ u32 status = 0; ++ ++ ahci_scr_read(&ap->link, SCR_STATUS, &status); ++ ++ /* link status is in bits 11-8 */ ++ status = status >> 8; ++ status = status & 0x7; ++ ++ if (status == 6) ++ return AHCI_PORT_SLUMBER; ++ if (status == 2) ++ return AHCI_PORT_PARTIAL; ++ if (status == 1) ++ return AHCI_PORT_ACTIVE; ++ return AHCI_PORT_NOLINK; ++} ++ ++static void account_alpm_stats(struct ata_port *ap) ++{ ++ struct ahci_port_priv *pp; ++ ++ int new_state; ++ u64 new_jiffies, jiffies_delta; ++ ++ if (ap == NULL) ++ return; ++ pp = ap->private_data; ++ ++ if (!pp) return; ++ ++ new_state = get_current_alpm_state(ap); ++ new_jiffies = jiffies; ++ ++ jiffies_delta = new_jiffies - pp->previous_jiffies; ++ ++ switch (pp->previous_state) { ++ case AHCI_PORT_NOLINK: ++ pp->active_jiffies = 0; ++ pp->partial_jiffies = 0; ++ pp->slumber_jiffies = 0; ++ break; ++ case AHCI_PORT_ACTIVE: ++ pp->active_jiffies += jiffies_delta; ++ break; ++ case AHCI_PORT_PARTIAL: ++ pp->partial_jiffies += jiffies_delta; ++ break; ++ case AHCI_PORT_SLUMBER: ++ pp->slumber_jiffies += jiffies_delta; ++ break; ++ default: ++ break; ++ } ++ pp->previous_state = new_state; ++ pp->previous_jiffies = new_jiffies; ++} ++ ++static ssize_t ahci_alpm_show_active(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ pp = ap->private_data; ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->active_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_partial(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->partial_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_slumber(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->slumber_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_accounting(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ ++ return sprintf(buf, "%u\n", pp->accounting_active); ++} ++ ++static ssize_t ahci_alpm_set_accounting(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ unsigned long flags; ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ void __iomem *port_mmio; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return 1; ++ ++ pp = ap->private_data; ++ port_mmio = ahci_port_base(ap); ++ ++ if (!pp) ++ return 1; ++ if (buf[0] == '0') ++ pp->accounting_active = 0; ++ if (buf[0] == '1') ++ pp->accounting_active = 1; ++ ++ /* we need to enable the PHYRDY interrupt when we want accounting */ ++ if (pp->accounting_active) { ++ spin_lock_irqsave(ap->lock, flags); ++ pp->intr_mask |= PORT_IRQ_PHYRDY; ++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ spin_unlock_irqrestore(ap->lock, flags); ++ } ++ return count; ++} ++ ++ + static void ahci_port_intr(struct ata_port *ap) + { + void __iomem *port_mmio = ahci_port_base(ap); +@@ -1590,6 +1772,7 @@ static void ahci_port_intr(struct ata_po + /* if LPM is enabled, PHYRDY doesn't mean anything */ + if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) { + status &= ~PORT_IRQ_PHYRDY; ++ account_alpm_stats(ap); + ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG); + } + diff --git a/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch b/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch deleted file mode 100644 index bd451e2..0000000 --- a/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch +++ /dev/null @@ -1,302 +0,0 @@ -From: Arjan van de Ven -Subject: [PATCH] libata: Add ALPM power state accounting to the AHCI driver - -PowerTOP wants to be able to show the user how effective the ALPM link -power management is for the user. ALPM is worth around 0.5W on a quiet -link; PowerTOP wants to be able to find cases where the "quiet link" isn't -actually quiet. - -This patch adds state accounting functionality to the AHCI driver for -PowerTOP to use. -The parts of the patch are -1) the sysfs logic of exposing the stats for each state in sysfs -2) the basic accounting logic that gets update on link change interrupts - (or when the user accesses the info from sysfs) -3) a "accounting enable" flag; in order to get the accounting to work, - the driver needs to get phyrdy interrupts on link status changes. - Normally and currently this is disabled by the driver when ALPM is - on (to reduce overhead); when PowerTOP is running this will need - to be on to get usable statistics... hence the sysfs tunable. - -The PowerTOP output currently looks like this: - -Recent SATA AHCI link activity statistics -Active Partial Slumber Device name - 0.5% 99.5% 0.0% host0 - -(work to resolve "host0" to a more human readable name is in progress) - -Signed-off-by: Arjan van de Ven - ---- - drivers/ata/ahci.h | 15 ++++ - drivers/ata/libahci.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 200 insertions(+), 2 deletions(-) - -Index: linux.trees.git/drivers/ata/ahci.h -=================================================================== ---- linux.trees.git.orig/drivers/ata/ahci.h -+++ linux.trees.git/drivers/ata/ahci.h -@@ -262,6 +262,13 @@ struct ahci_em_priv { - unsigned long led_state; - }; - -+enum ahci_port_states { -+ AHCI_PORT_NOLINK = 0, -+ AHCI_PORT_ACTIVE = 1, -+ AHCI_PORT_PARTIAL = 2, -+ AHCI_PORT_SLUMBER = 3 -+}; -+ - struct ahci_port_priv { - struct ata_link *active_link; - struct ahci_cmd_hdr *cmd_slot; -@@ -280,6 +287,14 @@ struct ahci_port_priv { - int fbs_last_dev; /* save FBS.DEV of last FIS */ - /* enclosure management info per PM slot */ - struct ahci_em_priv em_priv[EM_MAX_SLOTS]; -+ -+ /* ALPM accounting state and stats */ -+ unsigned int accounting_active:1; -+ u64 active_jiffies; -+ u64 partial_jiffies; -+ u64 slumber_jiffies; -+ int previous_state; -+ int previous_jiffies; - }; - - struct ahci_host_priv { -Index: linux.trees.git/drivers/ata/libahci.c -=================================================================== ---- linux.trees.git.orig/drivers/ata/libahci.c -+++ linux.trees.git/drivers/ata/libahci.c -@@ -58,6 +58,17 @@ MODULE_PARM_DESC(ignore_sss, "Ignore sta - - static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, - unsigned hints); -+static ssize_t ahci_alpm_show_active(struct device *dev, -+ struct device_attribute *attr, char *buf); -+static ssize_t ahci_alpm_show_slumber(struct device *dev, -+ struct device_attribute *attr, char *buf); -+static ssize_t ahci_alpm_show_partial(struct device *dev, -+ struct device_attribute *attr, char *buf); -+static ssize_t ahci_alpm_show_accounting(struct device *dev, -+ struct device_attribute *attr, char *buf); -+static ssize_t ahci_alpm_set_accounting(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count); - static ssize_t ahci_led_show(struct ata_port *ap, char *buf); - static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, - size_t size); -@@ -117,6 +128,12 @@ static DEVICE_ATTR(ahci_host_caps, S_IRU - static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL); - static DEVICE_ATTR(ahci_host_version, S_IRUGO, ahci_show_host_version, NULL); - static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL); -+static DEVICE_ATTR(ahci_alpm_active, S_IRUGO, ahci_alpm_show_active, NULL); -+static DEVICE_ATTR(ahci_alpm_partial, S_IRUGO, ahci_alpm_show_partial, NULL); -+static DEVICE_ATTR(ahci_alpm_slumber, S_IRUGO, ahci_alpm_show_slumber, NULL); -+static DEVICE_ATTR(ahci_alpm_accounting, S_IRUGO | S_IWUSR, -+ ahci_alpm_show_accounting, ahci_alpm_set_accounting); -+ - static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, - ahci_read_em_buffer, ahci_store_em_buffer); - -@@ -128,6 +145,10 @@ struct device_attribute *ahci_shost_attr - &dev_attr_ahci_host_cap2, - &dev_attr_ahci_host_version, - &dev_attr_ahci_port_cmd, -+ &dev_attr_ahci_alpm_active, -+ &dev_attr_ahci_alpm_partial, -+ &dev_attr_ahci_alpm_slumber, -+ &dev_attr_ahci_alpm_accounting, - &dev_attr_em_buffer, - NULL - }; -@@ -653,9 +674,14 @@ static int ahci_set_lpm(struct ata_link - * Disable interrupts on Phy Ready. This keeps us from - * getting woken up due to spurious phy ready - * interrupts. -+ * -+ * However, when accounting_active is set, we do want -+ * the interrupts for accounting purposes. - */ -- pp->intr_mask &= ~PORT_IRQ_PHYRDY; -- writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); -+ if (!pp->accounting_active) { -+ pp->intr_mask &= ~PORT_IRQ_PHYRDY; -+ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); -+ } - - sata_link_scr_lpm(link, policy, false); - } -@@ -1570,6 +1596,162 @@ static void ahci_error_intr(struct ata_p - ata_port_abort(ap); - } - -+static int get_current_alpm_state(struct ata_port *ap) -+{ -+ u32 status = 0; -+ -+ ahci_scr_read(&ap->link, SCR_STATUS, &status); -+ -+ /* link status is in bits 11-8 */ -+ status = status >> 8; -+ status = status & 0x7; -+ -+ if (status == 6) -+ return AHCI_PORT_SLUMBER; -+ if (status == 2) -+ return AHCI_PORT_PARTIAL; -+ if (status == 1) -+ return AHCI_PORT_ACTIVE; -+ return AHCI_PORT_NOLINK; -+} -+ -+static void account_alpm_stats(struct ata_port *ap) -+{ -+ struct ahci_port_priv *pp; -+ -+ int new_state; -+ u64 new_jiffies, jiffies_delta; -+ -+ if (ap == NULL) -+ return; -+ pp = ap->private_data; -+ -+ if (!pp) return; -+ -+ new_state = get_current_alpm_state(ap); -+ new_jiffies = jiffies; -+ -+ jiffies_delta = new_jiffies - pp->previous_jiffies; -+ -+ switch (pp->previous_state) { -+ case AHCI_PORT_NOLINK: -+ pp->active_jiffies = 0; -+ pp->partial_jiffies = 0; -+ pp->slumber_jiffies = 0; -+ break; -+ case AHCI_PORT_ACTIVE: -+ pp->active_jiffies += jiffies_delta; -+ break; -+ case AHCI_PORT_PARTIAL: -+ pp->partial_jiffies += jiffies_delta; -+ break; -+ case AHCI_PORT_SLUMBER: -+ pp->slumber_jiffies += jiffies_delta; -+ break; -+ default: -+ break; -+ } -+ pp->previous_state = new_state; -+ pp->previous_jiffies = new_jiffies; -+} -+ -+static ssize_t ahci_alpm_show_active(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct Scsi_Host *shost = class_to_shost(dev); -+ struct ata_port *ap = ata_shost_to_port(shost); -+ struct ahci_port_priv *pp; -+ -+ if (!ap || ata_port_is_dummy(ap)) -+ return -EINVAL; -+ pp = ap->private_data; -+ account_alpm_stats(ap); -+ -+ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->active_jiffies)); -+} -+ -+static ssize_t ahci_alpm_show_partial(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct Scsi_Host *shost = class_to_shost(dev); -+ struct ata_port *ap = ata_shost_to_port(shost); -+ struct ahci_port_priv *pp; -+ -+ if (!ap || ata_port_is_dummy(ap)) -+ return -EINVAL; -+ -+ pp = ap->private_data; -+ account_alpm_stats(ap); -+ -+ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->partial_jiffies)); -+} -+ -+static ssize_t ahci_alpm_show_slumber(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct Scsi_Host *shost = class_to_shost(dev); -+ struct ata_port *ap = ata_shost_to_port(shost); -+ struct ahci_port_priv *pp; -+ -+ if (!ap || ata_port_is_dummy(ap)) -+ return -EINVAL; -+ -+ pp = ap->private_data; -+ -+ account_alpm_stats(ap); -+ -+ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->slumber_jiffies)); -+} -+ -+static ssize_t ahci_alpm_show_accounting(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct Scsi_Host *shost = class_to_shost(dev); -+ struct ata_port *ap = ata_shost_to_port(shost); -+ struct ahci_port_priv *pp; -+ -+ if (!ap || ata_port_is_dummy(ap)) -+ return -EINVAL; -+ -+ pp = ap->private_data; -+ -+ return sprintf(buf, "%u\n", pp->accounting_active); -+} -+ -+static ssize_t ahci_alpm_set_accounting(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ unsigned long flags; -+ struct Scsi_Host *shost = class_to_shost(dev); -+ struct ata_port *ap = ata_shost_to_port(shost); -+ struct ahci_port_priv *pp; -+ void __iomem *port_mmio; -+ -+ if (!ap || ata_port_is_dummy(ap)) -+ return 1; -+ -+ pp = ap->private_data; -+ port_mmio = ahci_port_base(ap); -+ -+ if (!pp) -+ return 1; -+ if (buf[0] == '0') -+ pp->accounting_active = 0; -+ if (buf[0] == '1') -+ pp->accounting_active = 1; -+ -+ /* we need to enable the PHYRDY interrupt when we want accounting */ -+ if (pp->accounting_active) { -+ spin_lock_irqsave(ap->lock, flags); -+ pp->intr_mask |= PORT_IRQ_PHYRDY; -+ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); -+ spin_unlock_irqrestore(ap->lock, flags); -+ } -+ return count; -+} -+ -+ - static void ahci_port_intr(struct ata_port *ap) - { - void __iomem *port_mmio = ahci_port_base(ap); -@@ -1590,6 +1772,7 @@ static void ahci_port_intr(struct ata_po - /* if LPM is enabled, PHYRDY doesn't mean anything */ - if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) { - status &= ~PORT_IRQ_PHYRDY; -+ account_alpm_stats(ap); - ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG); - } - diff --git a/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch b/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch deleted file mode 100644 index 9f430c4..0000000 --- a/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 3950d3c04a6bf8ccf9ff912a49bdd242a2fe9e47 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Fri, 26 Nov 2010 12:18:03 -0800 -Subject: [PATCH] vfs: Add a trace point in the mark_inode_dirty function - -PowerTOP would like to be able to show who is keeping the disk -busy by dirtying data. The most logical spot for this is in the vfs -in the mark_inode_dirty() function, doing this on the block level -is not possible because by the time the IO hits the block layer the -guilty party can no longer be found ("kjournald" and "pdflush" are not -useful answers to "who caused this file to be dirty). - -The trace point follows the same logic/style as the block_dump code -and pretty much dumps the same data, just not to dmesg (and thus to -/var/log/messages) but via the trace events streams. - -Eventually we should be able to phase out the block dump code, but that's -for later on after a transition time. - -Signed-of-by: Arjan van de Ven ---- - fs/fs-writeback.c | 3 +++ - include/linux/fs.h | 12 ++++++++++++ - include/trace/events/writeback.h | 28 ++++++++++++++++++++++++++++ - 3 files changed, 43 insertions(+), 0 deletions(-) - -diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c -index 3d06ccc..62e33cc 100644 ---- a/fs/fs-writeback.c -+++ b/fs/fs-writeback.c -@@ -952,6 +952,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) - if ((inode->i_state & flags) == flags) - return; - -+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)) -+ trace_writeback_inode_dirty(inode, flags); -+ - if (unlikely(block_dump)) - block_dump___mark_inode_dirty(inode); - -diff --git a/include/linux/fs.h b/include/linux/fs.h -index c9e06cc..25935e1 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -1676,6 +1676,18 @@ struct super_operations { - - #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) - -+#define INODE_DIRTY_FLAGS \ -+ { I_DIRTY_SYNC, "DIRTY-SYNC" }, \ -+ { I_DIRTY_DATASYNC, "DIRTY-DATASYNC" }, \ -+ { I_DIRTY_PAGES, "DIRTY-PAGES" }, \ -+ { I_NEW, "NEW" }, \ -+ { I_WILL_FREE, "WILL-FREE" }, \ -+ { I_FREEING, "FREEING" }, \ -+ { I_CLEAR, "CLEAR" }, \ -+ { I_SYNC, "SYNC" }, \ -+ { I_REFERENCED, "REFERENCED" } -+ -+ - extern void __mark_inode_dirty(struct inode *, int); - static inline void mark_inode_dirty(struct inode *inode) - { -diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h -index 89a2b2d..5c80875 100644 ---- a/include/trace/events/writeback.h -+++ b/include/trace/events/writeback.h -@@ -186,6 +186,34 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested, - TP_ARGS(usec_timeout, usec_delayed) - ); - -+/* -+ * Tracepoint for dirtying an inode; used by PowerTOP -+ */ -+TRACE_EVENT(writeback_inode_dirty, -+ -+ TP_PROTO(struct inode *inode, int flags), -+ -+ TP_ARGS(inode, flags), -+ -+ TP_STRUCT__entry( -+ __field( __kernel_dev_t, dev ) -+ __field( ino_t, ino ) -+ __field( u32, flags ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = inode->i_sb->s_dev; -+ __entry->ino = inode->i_ino; -+ __entry->flags = flags; -+ ), -+ -+ TP_printk("dev %d:%d ino %lu flags %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), -+ (unsigned long) __entry->ino, -+ __entry->flags, -+ __print_flags(__entry->flags, "|", INODE_DIRTY_FLAGS) -+ ) -+); -+ - #endif /* _TRACE_WRITEBACK_H */ - - /* This part must be outside protection */ --- -1.7.2.3 - diff --git a/patches/linux-2.6.37-vfs-dirty-inode.patch b/patches/linux-2.6.37-vfs-dirty-inode.patch new file mode 100644 index 0000000..c218291 --- /dev/null +++ b/patches/linux-2.6.37-vfs-dirty-inode.patch @@ -0,0 +1,102 @@ +From 3950d3c04a6bf8ccf9ff912a49bdd242a2fe9e47 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Fri, 26 Nov 2010 12:18:03 -0800 +Subject: [PATCH] vfs: Add a trace point in the mark_inode_dirty function + +PowerTOP would like to be able to show who is keeping the disk +busy by dirtying data. The most logical spot for this is in the vfs +in the mark_inode_dirty() function, doing this on the block level +is not possible because by the time the IO hits the block layer the +guilty party can no longer be found ("kjournald" and "pdflush" are not +useful answers to "who caused this file to be dirty). + +The trace point follows the same logic/style as the block_dump code +and pretty much dumps the same data, just not to dmesg (and thus to +/var/log/messages) but via the trace events streams. + +Eventually we should be able to phase out the block dump code, but that's +for later on after a transition time. + +Signed-of-by: Arjan van de Ven +--- + fs/fs-writeback.c | 3 +++ + include/linux/fs.h | 12 ++++++++++++ + include/trace/events/writeback.h | 28 ++++++++++++++++++++++++++++ + 3 files changed, 43 insertions(+) + +Index: linux-2.6.37/fs/fs-writeback.c +=================================================================== +--- linux-2.6.37.orig/fs/fs-writeback.c ++++ linux-2.6.37/fs/fs-writeback.c +@@ -952,6 +952,9 @@ void __mark_inode_dirty(struct inode *in + if ((inode->i_state & flags) == flags) + return; + ++ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)) ++ trace_writeback_inode_dirty(inode, flags); ++ + if (unlikely(block_dump)) + block_dump___mark_inode_dirty(inode); + +Index: linux-2.6.37/include/linux/fs.h +=================================================================== +--- linux-2.6.37.orig/include/linux/fs.h ++++ linux-2.6.37/include/linux/fs.h +@@ -1677,6 +1677,18 @@ struct super_operations { + + #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) + ++#define INODE_DIRTY_FLAGS \ ++ { I_DIRTY_SYNC, "DIRTY-SYNC" }, \ ++ { I_DIRTY_DATASYNC, "DIRTY-DATASYNC" }, \ ++ { I_DIRTY_PAGES, "DIRTY-PAGES" }, \ ++ { I_NEW, "NEW" }, \ ++ { I_WILL_FREE, "WILL-FREE" }, \ ++ { I_FREEING, "FREEING" }, \ ++ { I_CLEAR, "CLEAR" }, \ ++ { I_SYNC, "SYNC" }, \ ++ { I_REFERENCED, "REFERENCED" } ++ ++ + extern void __mark_inode_dirty(struct inode *, int); + static inline void mark_inode_dirty(struct inode *inode) + { +Index: linux-2.6.37/include/trace/events/writeback.h +=================================================================== +--- linux-2.6.37.orig/include/trace/events/writeback.h ++++ linux-2.6.37/include/trace/events/writeback.h +@@ -186,6 +186,34 @@ DEFINE_EVENT(writeback_congest_waited_te + TP_ARGS(usec_timeout, usec_delayed) + ); + ++/* ++ * Tracepoint for dirtying an inode; used by PowerTOP ++ */ ++TRACE_EVENT(writeback_inode_dirty, ++ ++ TP_PROTO(struct inode *inode, int flags), ++ ++ TP_ARGS(inode, flags), ++ ++ TP_STRUCT__entry( ++ __field( __kernel_dev_t, dev ) ++ __field( ino_t, ino ) ++ __field( u32, flags ) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev = inode->i_sb->s_dev; ++ __entry->ino = inode->i_ino; ++ __entry->flags = flags; ++ ), ++ ++ TP_printk("dev %d:%d ino %lu flags %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), ++ (unsigned long) __entry->ino, ++ __entry->flags, ++ __print_flags(__entry->flags, "|", INODE_DIRTY_FLAGS) ++ ) ++); ++ + #endif /* _TRACE_WRITEBACK_H */ + + /* This part must be outside protection */ -- cgit v1.2.3