/* Copyright (c) 2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * RMNET Data Smart Hash Workqueue solution * */ #include "rmnet_shs.h" #include #include MODULE_LICENSE("GPL v2"); /* Local Macros */ #define RMNET_SHS_RX_BPNSEC_TO_BPSEC(x) ((x)*1000000000) #define RMNET_SHS_SEC_TO_NSEC(x) ((x)*1000000000) #define RMNET_SHS_NSEC_TO_SEC(x) ((x)/1000000000) #define RMNET_SHS_BYTE_TO_BIT(x) ((x)*8) #define RMNET_SHS_MIN_HSTAT_NODES_REQD 16 #define RMNET_SHS_WQ_DELAY_TICKS 10 #define PERIODIC_CLEAN 0 /* FORCE_CLEAN should only used during module de-ini.*/ #define FORCE_CLEAN 1 /* Time to wait (in time ticks) before re-triggering the workqueue * 1 tick = 10 ms (Maximum possible resolution) * 100 ticks = 1 second */ /* Local Definitions and Declarations */ unsigned int rmnet_shs_wq_frequency __read_mostly = RMNET_SHS_WQ_DELAY_TICKS; module_param(rmnet_shs_wq_frequency, uint, 0644); MODULE_PARM_DESC(rmnet_shs_wq_frequency, "Priodicity of Wq trigger(in ticks)"); unsigned long rmnet_shs_max_flow_inactivity_sec __read_mostly = RMNET_SHS_MAX_SKB_INACTIVE_TSEC; module_param(rmnet_shs_max_flow_inactivity_sec, ulong, 0644); MODULE_PARM_DESC(rmnet_shs_max_flow_inactivity_sec, "Max flow inactive time before clean up"); unsigned int rmnet_shs_wq_tuning __read_mostly = 80; module_param(rmnet_shs_wq_tuning, uint, 0644); MODULE_PARM_DESC(rmnet_shs_wq_tuning, "moving average weightage"); unsigned long long rmnet_shs_cpu_rx_max_pps_thresh[MAX_CPUS]__read_mostly = { RMNET_SHS_UDP_PPS_LPWR_CPU_UTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_UTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_UTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_UTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_UTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_UTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_UTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_UTHRESH }; module_param_array(rmnet_shs_cpu_rx_max_pps_thresh, ullong, 0, 0644); MODULE_PARM_DESC(rmnet_shs_cpu_rx_max_pps_thresh, "Max pkts core can handle"); unsigned long long rmnet_shs_cpu_rx_min_pps_thresh[MAX_CPUS]__read_mostly = { RMNET_SHS_UDP_PPS_LPWR_CPU_LTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_LTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_LTHRESH, RMNET_SHS_UDP_PPS_LPWR_CPU_LTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_LTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_LTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_LTHRESH, RMNET_SHS_UDP_PPS_PERF_CPU_LTHRESH }; module_param_array(rmnet_shs_cpu_rx_min_pps_thresh, ullong, 0, 0644); MODULE_PARM_DESC(rmnet_shs_cpu_rx_min_pps_thresh, "Min pkts core can handle"); unsigned int rmnet_shs_cpu_rx_flows[MAX_CPUS]; module_param_array(rmnet_shs_cpu_rx_flows, uint, 0, 0444); MODULE_PARM_DESC(rmnet_shs_cpu_rx_flows, "Num flows processed per core"); unsigned long long rmnet_shs_cpu_rx_bytes[MAX_CPUS]; module_param_array(rmnet_shs_cpu_rx_bytes, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_cpu_rx_bytes, "SHS stamp bytes per CPU"); unsigned long long rmnet_shs_cpu_rx_pkts[MAX_CPUS]; module_param_array(rmnet_shs_cpu_rx_pkts, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_cpu_rx_pkts, "SHS stamp total pkts per CPU"); unsigned long long rmnet_shs_cpu_rx_bps[MAX_CPUS]; module_param_array(rmnet_shs_cpu_rx_bps, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_cpu_rx_bps, "SHS stamp enq rate per CPU"); unsigned long long rmnet_shs_cpu_rx_pps[MAX_CPUS]; module_param_array(rmnet_shs_cpu_rx_pps, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_cpu_rx_pps, "SHS stamp pkt enq rate per CPU"); unsigned long rmnet_shs_flow_hash[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_hash, ulong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_hash, "SHS stamp hash flow"); unsigned long rmnet_shs_flow_proto[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_proto, ulong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_proto, "SHS stamp hash transport protocol"); unsigned long long rmnet_shs_flow_inactive_tsec[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_inactive_tsec, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_inactive_tsec, "SHS stamp inactive flow time"); int rmnet_shs_flow_cpu[MAX_SUPPORTED_FLOWS_DEBUG] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; module_param_array(rmnet_shs_flow_cpu, int, NULL, 0444); MODULE_PARM_DESC(rmnet_shs_flow_cpu, "SHS stamp flow processing CPU"); int rmnet_shs_flow_cpu_recommended[MAX_SUPPORTED_FLOWS_DEBUG] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; module_param_array(rmnet_shs_flow_cpu_recommended, int, NULL, 0444); MODULE_PARM_DESC(rmnet_shs_flow_cpu_recommended, "SHS stamp flow proc CPU"); unsigned long long rmnet_shs_flow_rx_bytes[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_rx_bytes, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_rx_bytes, "SHS stamp bytes per flow"); unsigned long long rmnet_shs_flow_rx_pkts[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_rx_pkts, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_rx_pkts, "SHS stamp total pkts per flow"); unsigned long long rmnet_shs_flow_rx_bps[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_rx_bps, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_rx_bps, "SHS stamp enq rate per flow"); unsigned long long rmnet_shs_flow_rx_pps[MAX_SUPPORTED_FLOWS_DEBUG]; module_param_array(rmnet_shs_flow_rx_pps, ullong, 0, 0444); MODULE_PARM_DESC(rmnet_shs_flow_rx_pps, "SHS stamp pkt enq rate per flow"); static spinlock_t rmnet_shs_wq_splock; static DEFINE_SPINLOCK(rmnet_shs_hstat_tbl_lock); static time_t rmnet_shs_wq_tnsec; static struct workqueue_struct *rmnet_shs_wq; static struct rmnet_shs_delay_wq_s *rmnet_shs_delayed_wq; static struct rmnet_shs_wq_rx_flow_s rmnet_shs_rx_flow_tbl; static struct list_head rmnet_shs_wq_hstat_tbl = LIST_HEAD_INIT(rmnet_shs_wq_hstat_tbl); static int rmnet_shs_flow_dbg_stats_idx_cnt; static struct list_head rmnet_shs_wq_ep_tbl = LIST_HEAD_INIT(rmnet_shs_wq_ep_tbl); /* Helper functions to add and remove entries to the table * that maintains a list of all endpoints (vnd's) available on this device. */ void rmnet_shs_wq_ep_tbl_add(struct rmnet_shs_wq_ep_s *ep) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_EP_TBL, RMNET_SHS_WQ_EP_TBL_ADD, 0xDEF, 0xDEF, 0xDEF, 0xDEF, ep, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_add(&ep->ep_list_id, &rmnet_shs_wq_ep_tbl); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } void rmnet_shs_wq_ep_tbl_remove(struct rmnet_shs_wq_ep_s *ep) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_EP_TBL, RMNET_SHS_WQ_EP_TBL_DEL, 0xDEF, 0xDEF, 0xDEF, 0xDEF, ep, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_del_init(&ep->ep_list_id); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } /* Helper functions to add and remove entries to the table * that maintains a list of all nodes that maintain statistics per flow */ void rmnet_shs_wq_hstat_tbl_add(struct rmnet_shs_wq_hstat_s *hnode) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_ADD, 0xDEF, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_add(&hnode->hstat_node_id, &rmnet_shs_wq_hstat_tbl); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } void rmnet_shs_wq_hstat_tbl_remove(struct rmnet_shs_wq_hstat_s *hnode) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_DEL, 0xDEF, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_del_init(&hnode->hstat_node_id); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } /* We maintain a list of all flow nodes processed by a cpu. * Below helper functions are used to maintain flow<=>cpu * association.* */ void rmnet_shs_wq_cpu_list_remove(struct rmnet_shs_wq_hstat_s *hnode) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_HSTAT_TBL, RMNET_SHS_WQ_CPU_HSTAT_TBL_DEL, 0xDEF, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_del_init(&hnode->cpu_node_id); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } void rmnet_shs_wq_cpu_list_add(struct rmnet_shs_wq_hstat_s *hnode, struct list_head *head) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_HSTAT_TBL, RMNET_SHS_WQ_CPU_HSTAT_TBL_ADD, 0xDEF, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_add(&hnode->cpu_node_id, head); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } void rmnet_shs_wq_cpu_list_move(struct rmnet_shs_wq_hstat_s *hnode, struct list_head *head) { unsigned long flags; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_HSTAT_TBL, RMNET_SHS_WQ_CPU_HSTAT_TBL_MOVE, hnode->current_cpu, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_move(&hnode->cpu_node_id, head); spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); } /* Resets all the parameters used to maintain hash statistics */ void rmnet_shs_wq_hstat_reset_node(struct rmnet_shs_wq_hstat_s *hnode) { hnode->c_epoch = 0; hnode->l_epoch = 0; hnode->node = NULL; hnode->inactive_duration = 0; hnode->rx_skb = 0; hnode->rx_bytes = 0; hnode->rx_pps = 0; hnode->rx_bps = 0; hnode->last_rx_skb = 0; hnode->last_rx_bytes = 0; hnode->rps_config_msk = 0; hnode->current_core_msk = 0; hnode->def_core_msk = 0; hnode->pri_core_msk = 0; hnode->available_core_msk = 0; hnode->hash = 0; hnode->suggested_cpu = 0; hnode->current_cpu = 0; hnode->skb_tport_proto = 0; hnode->stat_idx = -1; INIT_LIST_HEAD(&hnode->cpu_node_id); hnode->is_new_flow = 0; /* clear in use flag as a last action. This is required to ensure * the same node does not get allocated until all the paramaeters * are cleared. */ hnode->in_use = 0; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_NODE_RESET, hnode->is_perm, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); } /* Preallocates a set of flow nodes that maintain flow level statistics*/ void rmnet_shs_wq_hstat_alloc_nodes(u8 num_nodes_to_allocate, u8 is_store_perm) { struct rmnet_shs_wq_hstat_s *hnode = NULL; while (num_nodes_to_allocate > 0) { hnode = kzalloc(sizeof(*hnode), 0); if (hnode) { hnode->is_perm = is_store_perm; rmnet_shs_wq_hstat_reset_node(hnode); INIT_LIST_HEAD(&hnode->hstat_node_id); INIT_LIST_HEAD(&hnode->cpu_node_id); rmnet_shs_wq_hstat_tbl_add(hnode); } else { rmnet_shs_crit_err[RMNET_SHS_WQ_ALLOC_HSTAT_ERR]++; } hnode = NULL; num_nodes_to_allocate--; } } /* If there is an already pre-allocated node available and not in use, * we will try to re-use them. */ struct rmnet_shs_wq_hstat_s *rmnet_shs_wq_get_new_hstat_node(void) { struct rmnet_shs_wq_hstat_s *hnode; struct rmnet_shs_wq_hstat_s *ret_node = NULL; unsigned long flags; spin_lock_irqsave(&rmnet_shs_hstat_tbl_lock, flags); list_for_each_entry(hnode, &rmnet_shs_wq_hstat_tbl, hstat_node_id) { if (hnode == NULL) continue; if (hnode->in_use == 0) { ret_node = hnode; ret_node->in_use = 1; ret_node->is_new_flow = 1; break; } } spin_unlock_irqrestore(&rmnet_shs_hstat_tbl_lock, flags); if (ret_node) { trace_rmnet_shs_wq_low(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_NODE_REUSE, hnode->is_perm, 0xDEF, 0xDEF, 0xDEF, hnode, NULL); return ret_node; } /* We have reached a point where all pre-allocated nodes are in use * Allocating memory to maintain the flow level stats for new flow. * However, this newly allocated memory will be released as soon as we * realize that this flow is inactive */ ret_node = kzalloc(sizeof(*hnode), 0); if (!ret_node) { rmnet_shs_crit_err[RMNET_SHS_WQ_ALLOC_HSTAT_ERR]++; return NULL; } rmnet_shs_wq_hstat_reset_node(ret_node); ret_node->is_perm = 0; ret_node->in_use = 1; ret_node->is_new_flow = 1; INIT_LIST_HEAD(&ret_node->hstat_node_id); INIT_LIST_HEAD(&ret_node->cpu_node_id); trace_rmnet_shs_wq_low(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_NODE_DYN_ALLOCATE, ret_node->is_perm, 0xDEF, 0xDEF, 0xDEF, ret_node, NULL); rmnet_shs_wq_hstat_tbl_add(ret_node); return ret_node; } void rmnet_shs_wq_create_new_flow(struct rmnet_shs_skbn_s *node_p) { struct timespec time; node_p->hstats = rmnet_shs_wq_get_new_hstat_node(); if (node_p->hstats != NULL) { (void)getnstimeofday(&time); node_p->hstats->hash = node_p->hash; node_p->hstats->skb_tport_proto = node_p->skb_tport_proto; node_p->hstats->current_cpu = node_p->map_cpu; node_p->hstats->suggested_cpu = node_p->map_cpu; node_p->hstats->node = node_p; node_p->hstats->c_epoch = RMNET_SHS_SEC_TO_NSEC(time.tv_sec) + time.tv_nsec; node_p->hstats->l_epoch = RMNET_SHS_SEC_TO_NSEC(time.tv_sec) + time.tv_nsec; } trace_rmnet_shs_wq_high(RMNET_SHS_WQ_HSTAT_TBL, RMNET_SHS_WQ_HSTAT_TBL_NODE_NEW_REQ, 0xDEF, 0xDEF, 0xDEF, 0xDEF, node_p, node_p->hstats); } /* Refresh the RPS mask associated with this flow */ void rmnet_shs_wq_update_hstat_rps_msk(struct rmnet_shs_wq_hstat_s *hstat_p) { struct rmnet_shs_skbn_s *node_p; struct rmnet_shs_wq_ep_s *ep; node_p = hstat_p->node; /*Map RPS mask from the endpoint associated with this flow*/ list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (ep && (node_p->dev == ep->ep->egress_dev)) { hstat_p->rps_config_msk = ep->rps_config_msk; hstat_p->def_core_msk = ep->default_core_msk; hstat_p->pri_core_msk = ep->pri_core_msk; break; } } trace_rmnet_shs_wq_low(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_UPDATE_MSK, hstat_p->rps_config_msk, hstat_p->def_core_msk, hstat_p->pri_core_msk, 0xDEF, hstat_p, node_p); } void rmnet_shs_wq_update_hash_stats_debug(struct rmnet_shs_wq_hstat_s *hstats_p, struct rmnet_shs_skbn_s *node_p) { int idx = rmnet_shs_flow_dbg_stats_idx_cnt; if (!rmnet_shs_stats_enabled) return; if (hstats_p->stat_idx < 0) { idx = idx % MAX_SUPPORTED_FLOWS_DEBUG; hstats_p->stat_idx = idx; rmnet_shs_flow_dbg_stats_idx_cnt++; } rmnet_shs_flow_hash[hstats_p->stat_idx] = hstats_p->hash; rmnet_shs_flow_proto[hstats_p->stat_idx] = node_p->skb_tport_proto; rmnet_shs_flow_inactive_tsec[hstats_p->stat_idx] = RMNET_SHS_NSEC_TO_SEC(hstats_p->inactive_duration); rmnet_shs_flow_rx_bps[hstats_p->stat_idx] = hstats_p->rx_bps; rmnet_shs_flow_rx_pps[hstats_p->stat_idx] = hstats_p->rx_pps; rmnet_shs_flow_rx_bytes[hstats_p->stat_idx] = hstats_p->rx_bytes; rmnet_shs_flow_rx_pkts[hstats_p->stat_idx] = hstats_p->rx_skb; rmnet_shs_flow_cpu[hstats_p->stat_idx] = hstats_p->current_cpu; rmnet_shs_flow_cpu_recommended[hstats_p->stat_idx] = hstats_p->suggested_cpu; } /* Returns TRUE if this flow received a new packet * FALSE otherwise */ u8 rmnet_shs_wq_is_hash_rx_new_pkt(struct rmnet_shs_wq_hstat_s *hstats_p, struct rmnet_shs_skbn_s *node_p) { if (node_p->num_skb == hstats_p->rx_skb) return 0; return 1; } void rmnet_shs_wq_update_hash_tinactive(struct rmnet_shs_wq_hstat_s *hstats_p, struct rmnet_shs_skbn_s *node_p) { time_t tdiff; tdiff = rmnet_shs_wq_tnsec - hstats_p->c_epoch; hstats_p->inactive_duration = tdiff; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_FLOW_INACTIVE, hstats_p->hash, tdiff, 0xDEF, 0xDEF, hstats_p, NULL); } void rmnet_shs_wq_update_hash_stats(struct rmnet_shs_wq_hstat_s *hstats_p) { time_t tdiff; u64 skb_diff, bytes_diff; struct rmnet_shs_skbn_s *node_p; node_p = hstats_p->node; if (!rmnet_shs_wq_is_hash_rx_new_pkt(hstats_p, node_p)) { hstats_p->rx_pps = 0; hstats_p->rx_bps = 0; rmnet_shs_wq_update_hash_tinactive(hstats_p, node_p); rmnet_shs_wq_update_hash_stats_debug(hstats_p, node_p); return; } trace_rmnet_shs_wq_low(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_START, hstats_p->hash, 0xDEF, hstats_p->rx_pps, hstats_p->rx_bps, hstats_p, NULL); rmnet_shs_wq_update_hstat_rps_msk(hstats_p); hstats_p->inactive_duration = 0; hstats_p->l_epoch = node_p->hstats->c_epoch; hstats_p->last_rx_skb = node_p->hstats->rx_skb; hstats_p->last_rx_bytes = node_p->hstats->rx_bytes; hstats_p->c_epoch = rmnet_shs_wq_tnsec; hstats_p->rx_skb = node_p->num_skb; hstats_p->rx_bytes = node_p->num_skb_bytes; tdiff = (hstats_p->c_epoch - hstats_p->l_epoch); skb_diff = hstats_p->rx_skb - hstats_p->last_rx_skb; bytes_diff = hstats_p->rx_bytes - hstats_p->last_rx_bytes; hstats_p->rx_pps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(skb_diff)/(tdiff); hstats_p->rx_bps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(bytes_diff)/(tdiff); hstats_p->rx_bps = RMNET_SHS_BYTE_TO_BIT(hstats_p->rx_bps); rmnet_shs_wq_update_hash_stats_debug(hstats_p, node_p); trace_rmnet_shs_wq_high(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_END, hstats_p->hash, hstats_p->rx_pps, hstats_p->rx_bps, (tdiff/1000000), hstats_p, NULL); } static void rmnet_shs_wq_refresh_cpu_rates_debug(u16 cpu, struct rmnet_shs_wq_cpu_rx_pkt_q_s *cpu_p) { if (!rmnet_shs_stats_enabled) return; rmnet_shs_cpu_rx_bps[cpu] = cpu_p->rx_bps; rmnet_shs_cpu_rx_pps[cpu] = cpu_p->rx_pps; rmnet_shs_cpu_rx_flows[cpu] = cpu_p->flows; rmnet_shs_cpu_rx_bytes[cpu] = cpu_p->rx_bytes; rmnet_shs_cpu_rx_pkts[cpu] = cpu_p->rx_skbs; } static void rmnet_shs_wq_refresh_dl_mrkr_stats(void) { struct rmnet_shs_wq_rx_flow_s *tbl_p = &rmnet_shs_rx_flow_tbl; struct rmnet_port *port; u64 pkt_diff, byte_diff; time_t tdiff; tbl_p->dl_mrk_last_rx_bytes = tbl_p->dl_mrk_rx_bytes; tbl_p->dl_mrk_last_rx_pkts = tbl_p->dl_mrk_rx_pkts; port = rmnet_get_port(rmnet_shs_delayed_wq->netdev); if (!port) { rmnet_shs_crit_err[RMNET_SHS_WQ_GET_RMNET_PORT_ERR]++; return; } tbl_p->dl_mrk_rx_pkts = port->stats.dl_hdr_total_pkts; tbl_p->dl_mrk_rx_bytes = port->stats.dl_hdr_total_bytes; tdiff = rmnet_shs_wq_tnsec - tbl_p->l_epoch; pkt_diff = tbl_p->dl_mrk_rx_pkts - tbl_p->dl_mrk_last_rx_pkts; byte_diff = tbl_p->dl_mrk_rx_bytes - tbl_p->dl_mrk_last_rx_bytes; tbl_p->dl_mrk_rx_pps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(pkt_diff)/tdiff; tbl_p->dl_mrk_rx_bps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(byte_diff)/tdiff; tbl_p->dl_mrk_rx_bps = RMNET_SHS_BYTE_TO_BIT(tbl_p->dl_mrk_rx_bps); } static void rmnet_shs_wq_refresh_total_stats(void) { struct rmnet_shs_wq_rx_flow_s *tbl_p = &rmnet_shs_rx_flow_tbl; u64 pkt_diff, byte_diff, pps, bps; time_t tdiff; tdiff = rmnet_shs_wq_tnsec - tbl_p->l_epoch; pkt_diff = (tbl_p->rx_skbs - tbl_p->last_rx_skbs); byte_diff = tbl_p->rx_bytes - tbl_p->last_rx_bytes; pps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(pkt_diff)/tdiff; bps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(byte_diff)/tdiff; tbl_p->last_rx_bps = tbl_p->rx_bps; tbl_p->last_rx_pps = tbl_p->rx_pps; tbl_p->rx_bps = RMNET_SHS_BYTE_TO_BIT(bps); tbl_p->rx_pps = pps; tbl_p->l_epoch = rmnet_shs_wq_tnsec; tbl_p->last_rx_bytes = tbl_p->rx_bytes; tbl_p->last_rx_skbs = tbl_p->rx_skbs; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_TOTAL_STATS, RMNET_SHS_WQ_TOTAL_STATS_UPDATE, tbl_p->rx_pps, tbl_p->dl_mrk_rx_pps, tbl_p->rx_bps, tbl_p->dl_mrk_rx_bps, NULL, NULL); } static void rmnet_shs_wq_refresh_cpu_stats(u16 cpu) { struct rmnet_shs_wq_cpu_rx_pkt_q_s *cpu_p; time_t tdiff; u64 new_skbs, new_bytes; cpu_p = &rmnet_shs_rx_flow_tbl.cpu_list[cpu]; new_skbs = cpu_p->rx_skbs - cpu_p->last_rx_skbs; if (new_skbs == 0) { cpu_p->l_epoch = rmnet_shs_wq_tnsec; cpu_p->rx_bps = 0; cpu_p->rx_pps = 0; rmnet_shs_wq_refresh_cpu_rates_debug(cpu, cpu_p); return; } tdiff = rmnet_shs_wq_tnsec - cpu_p->l_epoch; new_bytes = cpu_p->rx_bytes - cpu_p->last_rx_bytes; cpu_p->last_rx_bps = cpu_p->rx_bps; cpu_p->last_rx_pps = cpu_p->rx_pps; cpu_p->rx_pps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(new_skbs)/tdiff; cpu_p->rx_bps = RMNET_SHS_RX_BPNSEC_TO_BPSEC(new_bytes)/tdiff; cpu_p->rx_bps = RMNET_SHS_BYTE_TO_BIT(cpu_p->rx_bps); cpu_p->l_epoch = rmnet_shs_wq_tnsec; cpu_p->last_rx_skbs = cpu_p->rx_skbs; cpu_p->last_rx_bytes = cpu_p->rx_bytes; cpu_p->rx_bps_est = cpu_p->rx_bps; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_UPDATE, cpu, cpu_p->flows, cpu_p->rx_pps, cpu_p->rx_bps, NULL, NULL); rmnet_shs_wq_refresh_cpu_rates_debug(cpu, cpu_p); } static void rmnet_shs_wq_refresh_all_cpu_stats(void) { u16 cpu; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_START, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); for (cpu = 0; cpu < MAX_CPUS; cpu++) rmnet_shs_wq_refresh_cpu_stats(cpu); trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_END, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); } void rmnet_shs_wq_update_cpu_rx_tbl(struct rmnet_shs_wq_hstat_s *hstat_p) { struct rps_map *map; struct rmnet_shs_skbn_s *node_p; int cpu_num; u16 map_idx; u64 skb_diff, byte_diff; struct rmnet_shs_wq_rx_flow_s *tbl_p = &rmnet_shs_rx_flow_tbl; node_p = hstat_p->node; if (hstat_p->inactive_duration > 0) return; map = rcu_dereference(node_p->dev->_rx->rps_map); if (!map) return; map_idx = node_p->map_index; cpu_num = map->cpus[map_idx]; skb_diff = hstat_p->rx_skb - hstat_p->last_rx_skb; byte_diff = hstat_p->rx_bytes - hstat_p->last_rx_bytes; if (hstat_p->is_new_flow) { rmnet_shs_wq_cpu_list_add(hstat_p, &tbl_p->cpu_list[cpu_num].hstat_id); hstat_p->is_new_flow = 0; } /* check if the flow has switched to another CPU*/ if (cpu_num != hstat_p->current_cpu) { trace_rmnet_shs_wq_high(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_UPDATE_NEW_CPU, hstat_p->hash, hstat_p->current_cpu, cpu_num, 0xDEF, hstat_p, NULL); rmnet_shs_wq_cpu_list_move(hstat_p, &tbl_p->cpu_list[cpu_num].hstat_id); rmnet_shs_wq_inc_cpu_flow(cpu_num); rmnet_shs_wq_dec_cpu_flow(hstat_p->current_cpu); hstat_p->current_cpu = cpu_num; } /* Assuming that the data transfers after the last refresh * interval have happened with the newer CPU */ tbl_p->cpu_list[cpu_num].rx_skbs += skb_diff; tbl_p->cpu_list[cpu_num].rx_bytes += byte_diff; tbl_p->rx_skbs += skb_diff; tbl_p->rx_bytes += byte_diff; } static void rmnet_shs_wq_chng_suggested_cpu(u16 old_cpu, u16 new_cpu, struct rmnet_shs_wq_ep_s *ep) { struct rmnet_shs_skbn_s *node_p; struct rmnet_shs_wq_hstat_s *hstat_p; u16 bkt; hash_for_each(RMNET_SHS_HT, bkt, node_p, list) { if (!node_p) continue; if (!node_p->hstats) continue; hstat_p = node_p->hstats; if ((hstat_p->suggested_cpu == old_cpu) && (node_p->dev == ep->ep->egress_dev)) { trace_rmnet_shs_wq_high(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_SUGGEST_NEW_CPU, hstat_p->hash, hstat_p->suggested_cpu, new_cpu, 0xDEF, hstat_p, NULL); node_p->hstats->suggested_cpu = new_cpu; } } } u64 rmnet_shs_wq_get_max_pps_among_cores(u32 core_msk) { int cpu_num; u64 max_pps = 0; struct rmnet_shs_wq_rx_flow_s *rx_flow_tbl_p = &rmnet_shs_rx_flow_tbl; for (cpu_num = 0; cpu_num < MAX_CPUS; cpu_num++) { if (((1 << cpu_num) & core_msk) && (rx_flow_tbl_p->cpu_list[cpu_num].rx_pps > max_pps)) { max_pps = rx_flow_tbl_p->cpu_list[cpu_num].rx_pps; } } return max_pps; } u32 rmnet_shs_wq_get_dev_rps_msk(struct net_device *dev) { u32 dev_rps_msk = 0; struct rmnet_shs_wq_ep_s *ep; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; if (ep->ep->egress_dev == dev) dev_rps_msk = ep->rps_config_msk; } return dev_rps_msk; } /* Return the least utilized core from the list of cores available * If all the cores are fully utilized return no specific core */ int rmnet_shs_wq_get_least_utilized_core(u16 core_msk) { int cpu_num; struct rmnet_shs_wq_rx_flow_s *rx_flow_tbl_p = &rmnet_shs_rx_flow_tbl; struct rmnet_shs_wq_cpu_rx_pkt_q_s *list_p; u64 min_pps = rmnet_shs_wq_get_max_pps_among_cores(core_msk); u64 max_pps = 0; int ret_val = -1; u8 is_cpu_in_msk; for (cpu_num = 0; cpu_num < MAX_CPUS; cpu_num++) { is_cpu_in_msk = (1 << cpu_num) & core_msk; if (!is_cpu_in_msk) continue; list_p = &rx_flow_tbl_p->cpu_list[cpu_num]; max_pps = rmnet_shs_wq_get_max_allowed_pps(cpu_num); trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_CURRENT_UTIL, cpu_num, list_p->rx_pps, min_pps, max_pps, NULL, NULL); /* lets not use a core that is already kinda loaded */ if (list_p->rx_pps > max_pps) continue; /* When there are multiple free CPUs the first free CPU will * be returned */ if (list_p->rx_pps == 0) { ret_val = cpu_num; break; } /* Found a core that is processing even lower packets */ if (list_p->rx_pps <= min_pps) { min_pps = list_p->rx_pps; ret_val = cpu_num; } } return ret_val; } u16 rmnet_shs_wq_find_cpu_to_move_flows(u16 current_cpu, struct rmnet_shs_wq_ep_s *ep) { struct rmnet_shs_wq_rx_flow_s *rx_flow_tbl_p = &rmnet_shs_rx_flow_tbl; struct rmnet_shs_wq_cpu_rx_pkt_q_s *cpu_list_p, *cur_cpu_list_p; u64 cpu_rx_pps, reqd_pps, cur_cpu_rx_pps; u64 pps_uthresh, pps_lthresh = 0; u16 cpu_to_move = current_cpu; u16 cpu_num; u8 is_core_in_msk; if (!ep) { rmnet_shs_crit_err[RMNET_SHS_WQ_EP_ACCESS_ERR]++; return cpu_to_move; } cur_cpu_list_p = &rx_flow_tbl_p->cpu_list[current_cpu]; cur_cpu_rx_pps = cur_cpu_list_p->rx_pps; pps_uthresh = rmnet_shs_cpu_rx_max_pps_thresh[current_cpu]; /* If we are already on a perf core and required pps is beyond * beyond the capacity that even perf cores aren't sufficient * there is nothing much we can do. So we will continue to let flows * process packets on same perf core */ if (!rmnet_shs_is_lpwr_cpu(current_cpu) && (cur_cpu_rx_pps > pps_uthresh)) { return cpu_to_move; } for (cpu_num = 0; cpu_num < MAX_CPUS; cpu_num++) { is_core_in_msk = ((1 << cpu_num) & ep->rps_config_msk); /* We are looking for a core that is configured and that * can handle traffic better than the current core */ if ((cpu_num == current_cpu) || (!is_core_in_msk)) continue; pps_uthresh = rmnet_shs_cpu_rx_max_pps_thresh[cpu_num]; pps_lthresh = rmnet_shs_cpu_rx_min_pps_thresh[cpu_num]; cpu_list_p = &rx_flow_tbl_p->cpu_list[cpu_num]; cpu_rx_pps = cpu_list_p->rx_pps; reqd_pps = cpu_rx_pps + cur_cpu_rx_pps; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_CORE2SWITCH_FIND, current_cpu, cpu_num, reqd_pps, cpu_rx_pps, NULL, NULL); /* Return the first available CPU */ if ((reqd_pps > pps_lthresh) && (reqd_pps < pps_uthresh)) { cpu_to_move = cpu_num; break; } } trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_CORE2SWITCH_FIND, current_cpu, cpu_to_move, cur_cpu_rx_pps, rx_flow_tbl_p->cpu_list[cpu_to_move].rx_pps, NULL, NULL); return cpu_to_move; } void rmnet_shs_wq_find_cpu_and_move_flows(u16 cur_cpu) { struct rmnet_shs_wq_ep_s *ep; u16 new_cpu; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; new_cpu = rmnet_shs_wq_find_cpu_to_move_flows(cur_cpu, ep); if (new_cpu != cur_cpu) rmnet_shs_wq_chng_suggested_cpu(cur_cpu, new_cpu, ep); } } void rmnet_shs_wq_eval_suggested_cpu(void) { struct rmnet_shs_wq_rx_flow_s *rx_flow_tbl_p = &rmnet_shs_rx_flow_tbl; struct rmnet_shs_wq_cpu_rx_pkt_q_s *cpu_list_p; u64 cpu_curr_pps, cpu_last_pps, last_avg_pps; u64 moving_avg_pps, avg_pps; u64 pps_uthresh, pps_lthresh = 0; u16 cpu_num, new_weight, old_weight; int flows; for (cpu_num = 0; cpu_num < MAX_CPUS; cpu_num++) { flows = rx_flow_tbl_p->cpu_list[cpu_num].flows; /* Nothing to evaluate if there is no traffic on this cpu */ if (flows <= 0) continue; cpu_list_p = &rx_flow_tbl_p->cpu_list[cpu_num]; cpu_curr_pps = cpu_list_p->rx_pps; cpu_last_pps = cpu_list_p->last_rx_pps; last_avg_pps = cpu_list_p->avg_pps; pps_uthresh = rmnet_shs_cpu_rx_max_pps_thresh[cpu_num]; pps_lthresh = rmnet_shs_cpu_rx_min_pps_thresh[cpu_num]; /* Often when we decide to switch from a small cluster core, * it is because of the heavy traffic on that core. In such * circumstances, we want to switch to a big cluster * core as soon as possible. Therefore, we will provide a * greater weightage to the most recent sample compared to * the previous samples. * * On the other hand, when a flow which is on a big cluster * cpu suddenly starts to receive low traffic we move to a * small cluster core after observing low traffic for some * more samples. This approach avoids switching back and forth * to small cluster cpus due to momentary decrease in data * traffic. */ if (rmnet_shs_is_lpwr_cpu(cpu_num)) { new_weight = rmnet_shs_wq_tuning; old_weight = 100 - rmnet_shs_wq_tuning; } else { old_weight = rmnet_shs_wq_tuning; new_weight = 100 - rmnet_shs_wq_tuning; } /*computing weighted average*/ moving_avg_pps = (cpu_last_pps + last_avg_pps) / 2; avg_pps = ((new_weight * cpu_curr_pps) + (old_weight * moving_avg_pps)) / (new_weight + old_weight); cpu_list_p->avg_pps = avg_pps; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_CORE2SWITCH_EVAL_CPU, cpu_num, cpu_curr_pps, cpu_last_pps, avg_pps, NULL, NULL); if ((avg_pps > pps_uthresh) || ((avg_pps < pps_lthresh) && (cpu_curr_pps < pps_lthresh))) rmnet_shs_wq_find_cpu_and_move_flows(cpu_num); } } void rmnet_shs_wq_refresh_new_flow_list_per_ep(struct rmnet_shs_wq_ep_s *ep) { int lo_core; int hi_core; u16 rps_msk; u16 lo_msk; u16 hi_msk; u8 lo_core_idx = 0; u8 hi_core_idx = 0; if (!ep) { rmnet_shs_crit_err[RMNET_SHS_WQ_EP_ACCESS_ERR]++; return; } rps_msk = ep->rps_config_msk; lo_msk = ep->default_core_msk; hi_msk = ep->pri_core_msk; memset(ep->new_lo_core, -1, sizeof(*ep->new_lo_core) * MAX_CPUS); memset(ep->new_hi_core, -1, sizeof(*ep->new_hi_core) * MAX_CPUS); do { lo_core = rmnet_shs_wq_get_least_utilized_core(lo_msk); if (lo_core >= 0) { ep->new_lo_core[lo_core_idx] = lo_core; lo_msk = lo_msk & ~(1 << lo_core); lo_core_idx++; } else { break; } } while (lo_msk != 0); trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_NEW_FLOW_LIST_LO, ep->new_lo_core[0], ep->new_lo_core[1], ep->new_lo_core[2], ep->new_lo_max, ep, NULL); do { hi_core = rmnet_shs_wq_get_least_utilized_core(hi_msk); if (hi_core >= 0) { ep->new_hi_core[hi_core_idx] = hi_core; hi_msk = hi_msk & ~(1 << hi_core); hi_core_idx++; } else break; } while (hi_msk != 0); ep->new_lo_max = lo_core_idx; ep->new_hi_max = hi_core_idx; ep->new_lo_idx = 0; ep->new_hi_idx = 0; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_NEW_FLOW_LIST_HI, ep->new_hi_core[0], ep->new_hi_core[1], ep->new_hi_core[2], ep->new_hi_max, ep, NULL); return; } void rmnet_shs_wq_refresh_new_flow_list(void) { struct rmnet_shs_wq_ep_s *ep; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; rmnet_shs_wq_refresh_new_flow_list_per_ep(ep); } } /* Return Invalid core if only pri core available*/ int rmnet_shs_wq_get_lpwr_cpu_new_flow(struct net_device *dev) { u8 lo_idx; u8 lo_max; int cpu_assigned = -1; u8 is_match_found = 0; struct rmnet_shs_wq_ep_s *ep; if (!dev) { rmnet_shs_crit_err[RMNET_SHS_NETDEV_ERR]++; return cpu_assigned; } list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; if (ep->ep->egress_dev == dev) { is_match_found = 1; break; } } if (!is_match_found) { rmnet_shs_crit_err[RMNET_SHS_WQ_EP_ACCESS_ERR]++; return cpu_assigned; } lo_idx = ep->new_lo_idx; lo_max = ep->new_lo_max; while (lo_idx < lo_max) { if (ep->new_lo_core[lo_idx] >= 0) { cpu_assigned = ep->new_lo_core[lo_idx]; break; } lo_idx++; } /* Increment CPU assignment idx to be ready for next flow assignment*/ if ((cpu_assigned >= 0)|| ((ep->new_lo_idx + 1) >= ep->new_lo_max)) ep->new_lo_idx = ((ep->new_lo_idx + 1) % ep->new_lo_max); return cpu_assigned; } int rmnet_shs_wq_get_perf_cpu_new_flow(struct net_device *dev) { struct rmnet_shs_wq_ep_s *ep; int cpu_assigned = -1; u8 hi_idx; u8 hi_max; u8 is_match_found = 0; if (!dev) { rmnet_shs_crit_err[RMNET_SHS_NETDEV_ERR]++; return cpu_assigned; } list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; if (ep->ep->egress_dev == dev) { is_match_found = 1; break; } } if (!is_match_found) { rmnet_shs_crit_err[RMNET_SHS_WQ_EP_ACCESS_ERR]++; return cpu_assigned; } hi_idx = ep->new_hi_idx; hi_max = ep->new_hi_max; while (hi_idx < hi_max) { if (ep->new_hi_core[hi_idx] >= 0) { cpu_assigned = ep->new_hi_core[hi_idx]; break; } hi_idx++; } /* Increment CPU assignment idx to be ready for next flow assignment*/ if (cpu_assigned >= 0) ep->new_hi_idx = ((hi_idx + 1) % hi_max); return cpu_assigned; } void rmnet_shs_wq_cleanup_hash_tbl(u8 force_clean) { struct rmnet_shs_skbn_s *node_p; time_t tns2s; unsigned long ht_flags; struct rmnet_shs_wq_hstat_s *hnode = NULL; struct list_head *ptr, *next; list_for_each_safe(ptr, next, &rmnet_shs_wq_hstat_tbl) { hnode = list_entry(ptr, struct rmnet_shs_wq_hstat_s, hstat_node_id); if (hnode == NULL) continue; if (hnode->node == NULL) continue; node_p = hnode->node; tns2s = RMNET_SHS_NSEC_TO_SEC(hnode->inactive_duration); if (tns2s > rmnet_shs_max_flow_inactivity_sec || force_clean) { trace_rmnet_shs_wq_low(RMNET_SHS_WQ_FLOW_STATS, RMNET_SHS_WQ_FLOW_STATS_FLOW_INACTIVE_TIMEOUT, node_p->hash, tns2s, 0xDEF, 0xDEF, node_p, hnode); spin_lock_irqsave(&rmnet_shs_ht_splock, ht_flags); rmnet_shs_wq_dec_cpu_flow(hnode->current_cpu); if (node_p) { rmnet_shs_cpu_node_remove(node_p); hash_del_rcu(&node_p->list); kfree(node_p); } rmnet_shs_wq_cpu_list_remove(hnode); if (hnode->is_perm == 0 || force_clean) { rmnet_shs_wq_hstat_tbl_remove(hnode); kfree(hnode); } else { rmnet_shs_wq_hstat_reset_node(hnode); } spin_unlock_irqrestore(&rmnet_shs_ht_splock, ht_flags); } } } void rmnet_shs_wq_update_ep_rps_msk(struct rmnet_shs_wq_ep_s *ep) { u8 len = 0; struct rps_map *map; if (!ep) { rmnet_shs_crit_err[RMNET_SHS_WQ_EP_ACCESS_ERR]++; return; } map = rcu_dereference(ep->ep->egress_dev->_rx->rps_map); ep->rps_config_msk = 0; if (map != NULL) { for (len = 0; len < map->len; len++) ep->rps_config_msk |= (1 << map->cpus[len]); } ep->default_core_msk = ep->rps_config_msk & 0x0F; ep->pri_core_msk = ep->rps_config_msk & 0xF0; } void rmnet_shs_wq_reset_ep_active(struct net_device *dev) { struct rmnet_shs_wq_ep_s *ep; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (ep->ep->egress_dev == dev) ep->is_ep_active = 0; } } void rmnet_shs_wq_set_ep_active(struct net_device *dev) { struct rmnet_shs_wq_ep_s *ep; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (ep->ep->egress_dev == dev) ep->is_ep_active = 1; } } void rmnet_shs_wq_refresh_ep_masks(void) { struct rmnet_shs_wq_ep_s *ep; list_for_each_entry(ep, &rmnet_shs_wq_ep_tbl, ep_list_id) { if (!ep) continue; if (!ep->is_ep_active) continue; rmnet_shs_wq_update_ep_rps_msk(ep); } } static void rmnet_shs_wq_update_stats(void) { struct timespec time; struct rmnet_shs_wq_hstat_s *hnode; (void) getnstimeofday(&time); rmnet_shs_wq_tnsec = RMNET_SHS_SEC_TO_NSEC(time.tv_sec) + time.tv_nsec; rmnet_shs_wq_refresh_ep_masks(); list_for_each_entry(hnode, &rmnet_shs_wq_hstat_tbl, hstat_node_id) { if (!hnode) continue; if (hnode->in_use == 0) continue; if (hnode->node) { rmnet_shs_wq_update_hash_stats(hnode); rmnet_shs_wq_update_cpu_rx_tbl(hnode); } } rmnet_shs_wq_refresh_all_cpu_stats(); rmnet_shs_wq_refresh_total_stats(); rmnet_shs_wq_refresh_dl_mrkr_stats(); rmnet_shs_wq_eval_suggested_cpu(); rmnet_shs_wq_refresh_new_flow_list(); /*Invoke after both the locks are released*/ rmnet_shs_wq_cleanup_hash_tbl(PERIODIC_CLEAN); } void rmnet_shs_wq_process_wq(struct work_struct *work) { trace_rmnet_shs_wq_high(RMNET_SHS_WQ_PROCESS_WQ, RMNET_SHS_WQ_PROCESS_WQ_START, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); rmnet_shs_wq_update_stats(); queue_delayed_work(rmnet_shs_wq, &rmnet_shs_delayed_wq->wq, rmnet_shs_wq_frequency); trace_rmnet_shs_wq_high(RMNET_SHS_WQ_PROCESS_WQ, RMNET_SHS_WQ_PROCESS_WQ_END, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); } void rmnet_shs_wq_clean_ep_tbl(void) { struct rmnet_shs_wq_ep_s *ep; struct list_head *ptr, *next; list_for_each_safe(ptr, next, &rmnet_shs_wq_ep_tbl) { ep = list_entry(ptr, struct rmnet_shs_wq_ep_s, ep_list_id); if (!ep) continue; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_EP_TBL, RMNET_SHS_WQ_EP_TBL_CLEANUP, 0xDEF, 0xDEF, 0xDEF, 0xDEF, ep, NULL); rmnet_shs_wq_ep_tbl_remove(ep); kfree(ep); } } void rmnet_shs_wq_exit(void) { /*If Wq is not initialized, nothing to cleanup */ if (!rmnet_shs_wq || !rmnet_shs_delayed_wq) return; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_EXIT, RMNET_SHS_WQ_EXIT_START, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); cancel_delayed_work_sync(&rmnet_shs_delayed_wq->wq); drain_workqueue(rmnet_shs_wq); destroy_workqueue(rmnet_shs_wq); kfree(rmnet_shs_delayed_wq); rmnet_shs_delayed_wq = NULL; rmnet_shs_wq = NULL; rmnet_shs_wq_cleanup_hash_tbl(FORCE_CLEAN); rmnet_shs_wq_clean_ep_tbl(); trace_rmnet_shs_wq_high(RMNET_SHS_WQ_EXIT, RMNET_SHS_WQ_EXIT_END, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); } void rmnet_shs_wq_gather_rmnet_ep(struct net_device *dev) { u8 mux_id; struct rmnet_port *port; struct rmnet_endpoint *ep; struct rmnet_shs_wq_ep_s *ep_wq; port = rmnet_get_port(dev); for (mux_id = 1; mux_id < 255; mux_id++) { ep = rmnet_get_endpoint(port, mux_id); if (!ep) continue; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_EP_TBL, RMNET_SHS_WQ_EP_TBL_INIT, 0xDEF, 0xDEF, 0xDEF, 0xDEF, ep, NULL); ep_wq = kzalloc(sizeof(*ep_wq), 0); if (!ep_wq) { rmnet_shs_crit_err[RMNET_SHS_WQ_ALLOC_EP_TBL_ERR]++; return; } INIT_LIST_HEAD(&ep_wq->ep_list_id); ep_wq->ep = ep; rmnet_shs_wq_update_ep_rps_msk(ep_wq); rmnet_shs_wq_ep_tbl_add(ep_wq); } } void rmnet_shs_wq_init_cpu_rx_flow_tbl(void) { u8 cpu_num; struct rmnet_shs_wq_cpu_rx_pkt_q_s *rx_flow_tbl_p; for (cpu_num = 0; cpu_num < MAX_CPUS; cpu_num++) { trace_rmnet_shs_wq_high(RMNET_SHS_WQ_CPU_HSTAT_TBL, RMNET_SHS_WQ_CPU_HSTAT_TBL_INIT, cpu_num, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); rx_flow_tbl_p = &rmnet_shs_rx_flow_tbl.cpu_list[cpu_num]; INIT_LIST_HEAD(&rx_flow_tbl_p->hstat_id); } } void rmnet_shs_wq_pause(void) { if (rmnet_shs_wq && rmnet_shs_delayed_wq) cancel_delayed_work_sync(&rmnet_shs_delayed_wq->wq); } void rmnet_shs_wq_restart(void) { if (rmnet_shs_wq && rmnet_shs_delayed_wq) queue_delayed_work(rmnet_shs_wq, &rmnet_shs_delayed_wq->wq, 0); } void rmnet_shs_wq_init(struct net_device *dev) { /*If the workqueue is already initialized we should not be *initializing again */ if (rmnet_shs_wq) return; trace_rmnet_shs_wq_high(RMNET_SHS_WQ_INIT, RMNET_SHS_WQ_INIT_START, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); spin_lock_init(&rmnet_shs_wq_splock); rmnet_shs_wq = alloc_workqueue("rmnet_shs_wq", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (!rmnet_shs_wq) { rmnet_shs_crit_err[RMNET_SHS_WQ_ALLOC_WQ_ERR]++; return; } rmnet_shs_delayed_wq = kmalloc(sizeof(struct rmnet_shs_delay_wq_s), GFP_ATOMIC); if (!rmnet_shs_delayed_wq) { rmnet_shs_crit_err[RMNET_SHS_WQ_ALLOC_DEL_WQ_ERR]++; rmnet_shs_wq_exit(); return; } rmnet_shs_delayed_wq->netdev = dev; rmnet_shs_wq_gather_rmnet_ep(dev); /*All hstat nodes allocated during Wq init will be held for ever*/ rmnet_shs_wq_hstat_alloc_nodes(RMNET_SHS_MIN_HSTAT_NODES_REQD, 1); rmnet_shs_wq_init_cpu_rx_flow_tbl(); INIT_DEFERRABLE_WORK(&rmnet_shs_delayed_wq->wq, rmnet_shs_wq_process_wq); /* During initialization, we can start workqueue without a delay * to initialize all meta data and pre allocated memory * for hash stats, if required */ queue_delayed_work(rmnet_shs_wq, &rmnet_shs_delayed_wq->wq, 0); trace_rmnet_shs_wq_high(RMNET_SHS_WQ_INIT, RMNET_SHS_WQ_INIT_END, 0xDEF, 0xDEF, 0xDEF, 0xDEF, NULL, NULL); } int rmnet_shs_wq_get_num_cpu_flows(u16 cpu) { int flows = -1; if (cpu >= MAX_CPUS) { rmnet_shs_crit_err[RMNET_SHS_INVALID_CPU_ERR]++; return flows; } flows = rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_GET_CPU_FLOW, cpu, flows, 0xDEF, 0xDEF, NULL, NULL); return flows; } int rmnet_shs_wq_get_max_flows_per_core(void) { u16 cpu; int max_flows = -1; int cpu_flows; for (cpu = 0; cpu < MAX_CPUS; cpu++) { cpu_flows = rmnet_shs_wq_get_num_cpu_flows(cpu); if (cpu_flows > max_flows) max_flows = cpu_flows; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_GET_MAX_CPU_FLOW, cpu, cpu_flows, max_flows, 0xDEF, NULL, NULL); } return max_flows; } int rmnet_shs_wq_get_max_flows_per_cluster(u16 cpu) { u32 big_cluster_mask = 1<<4; u32 core_mask = 1; u16 start_core = 0; u16 end_core = 4; int max_flows = -1; int cpu_flows; if (cpu > MAX_CPUS) { rmnet_shs_crit_err[RMNET_SHS_INVALID_CPU_ERR]++; return max_flows; } core_mask <<= cpu; if (core_mask >= big_cluster_mask) { start_core = 4; end_core = MAX_CPUS; } for (start_core; start_core < end_core; start_core++) { cpu_flows = rmnet_shs_wq_get_num_cpu_flows(start_core); if (cpu_flows > max_flows) max_flows = cpu_flows; } trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_MAX_FLOW_IN_CLUSTER, start_core, end_core, cpu, max_flows, NULL, NULL); return max_flows; } void rmnet_shs_wq_inc_cpu_flow(u16 cpu) { rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows++; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_INC_CPU_FLOW, cpu, rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows, 0xDEF, 0xDEF, NULL, NULL); } void rmnet_shs_wq_dec_cpu_flow(u16 cpu) { if (rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows > 0) rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows--; trace_rmnet_shs_wq_low(RMNET_SHS_WQ_CPU_STATS, RMNET_SHS_WQ_CPU_STATS_DEC_CPU_FLOW, cpu, rmnet_shs_rx_flow_tbl.cpu_list[cpu].flows, 0xDEF, 0xDEF, NULL, NULL); } u64 rmnet_shs_wq_get_max_allowed_pps(u16 cpu) { return rmnet_shs_cpu_rx_max_pps_thresh[cpu]; }