Newer
Older
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 9 Feb 2024 20:47:39 +0100
Subject: mac80211: add AQL support for broadcast/multicast packets
Should improve performance/reliability with lots of mcast packets
Signed-off-by: Felix Fietkau <nbd@nbd.name>
(cherry picked from commit 95e633efbd1b4ffbbfc2d8abba2b05291f6e9903)
diff --git a/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch b/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch
new file mode 100644
index 0000000000000000000000000000000000000000..5f6754e5024f90f7ba6833c3702fe3ce425c50bb
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 9 Feb 2024 19:43:40 +0100
+Subject: [PATCH] mac80211: add AQL support for broadcast packets
+
+Excessive broadcast traffic with little competing unicast traffic can easily
+flood hardware queues, leading to throughput issues. Additionally, filling
+the hardware queues with too many packets breaks FQ for broadcast data.
+Fix this by enabling AQL for broadcast packets.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -3158,6 +3158,7 @@ enum wiphy_params_flags {
+ /* The per TXQ device queue limit in airtime */
+ #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
+ #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
++#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000
+
+ /* The per interface airtime threshold to switch to lower queue limit */
+ #define IEEE80211_AQL_THRESHOLD 24000
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct f
+ "VI %u us\n"
+ "BE %u us\n"
+ "BK %u us\n"
++ "BC/MC %u us\n"
+ "total %u us\n",
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
++ atomic_read(&local->aql_bc_pending_airtime),
+ atomic_read(&local->aql_total_pending_airtime));
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+@@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct
+ "VO %u %u\n"
+ "VI %u %u\n"
+ "BE %u %u\n"
+- "BK %u %u\n",
++ "BK %u %u\n"
++ "BC/MC %u\n",
+ local->aql_txq_limit_low[IEEE80211_AC_VO],
+ local->aql_txq_limit_high[IEEE80211_AC_VO],
+ local->aql_txq_limit_low[IEEE80211_AC_VI],
+@@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct
+ local->aql_txq_limit_low[IEEE80211_AC_BE],
+ local->aql_txq_limit_high[IEEE80211_AC_BE],
+ local->aql_txq_limit_low[IEEE80211_AC_BK],
+- local->aql_txq_limit_high[IEEE80211_AC_BK]);
++ local->aql_txq_limit_high[IEEE80211_AC_BK],
++ local->aql_txq_limit_bc);
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+ }
+@@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struc
+ else
+ buf[count] = '\0';
+
++ if (sscanf(buf, "mcast %u", &q_limit_low) == 1) {
++ local->aql_txq_limit_bc = q_limit_low;
++ return count;
++ }
++
+ if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
+ return -EINVAL;
+
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -1300,10 +1300,12 @@ struct ieee80211_local {
+ u16 schedule_round[IEEE80211_NUM_ACS];
+
+ u16 airtime_flags;
++ u32 aql_txq_limit_bc;
+ u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
+ u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
+ u32 aql_threshold;
+ atomic_t aql_total_pending_airtime;
++ atomic_t aql_bc_pending_airtime;
+ atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
+
+ const struct ieee80211_ops *ops;
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -789,6 +789,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+ spin_lock_init(&local->rx_path_lock);
+ spin_lock_init(&local->queue_stop_reason_lock);
+
++ local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC;
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ INIT_LIST_HEAD(&local->active_txqs[i]);
+ spin_lock_init(&local->active_txq_lock[i]);
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -2164,13 +2164,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre
+
+ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
+ struct sta_info *sta, u8 ac,
+- u16 tx_airtime, bool tx_completed)
++ u16 tx_airtime, bool tx_completed,
++ bool mcast)
+ {
+ int tx_pending;
+
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+ return;
+
++ if (mcast) {
++ if (!tx_completed) {
++ atomic_add(tx_airtime, &local->aql_bc_pending_airtime);
++ return;
++ }
++
++ tx_pending = atomic_sub_return(tx_airtime,
++ &local->aql_bc_pending_airtime);
++ if (tx_pending < 0)
++ atomic_cmpxchg(&local->aql_bc_pending_airtime,
++ tx_pending, 0);
++ return;
++ }
++
+ if (!tx_completed) {
+ if (sta)
+ atomic_add(tx_airtime,
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -2553,7 +2553,7 @@ static u16 ieee80211_store_ack_skb(struc
+
+ spin_lock_irqsave(&local->ack_status_lock, flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x2000, GFP_ATOMIC);
++ 1, 0x1000, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, flags);
+
+ if (id >= 0) {
+@@ -3957,20 +3957,20 @@ begin:
+ encap_out:
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
+
+- if (tx.sta &&
+- wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
+- bool ampdu = txq->ac != IEEE80211_AC_VO;
++ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
++ bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO;
+ u32 airtime;
+
+ airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
+ skb->len, ampdu);
+- if (airtime) {
+- airtime = ieee80211_info_set_tx_time_est(info, airtime);
+- ieee80211_sta_update_pending_airtime(local, tx.sta,
+- txq->ac,
+- airtime,
+- false);
+- }
++ if (!airtime)
++ return skb;
++
++ airtime = ieee80211_info_set_tx_time_est(info, airtime);
++ info->tx_time_mc = !tx.sta;
++ ieee80211_sta_update_pending_airtime(local, tx.sta, txq->ac,
++ airtime, false,
++ info->tx_time_mc);
+ }
+
+ return skb;
+@@ -4025,6 +4025,7 @@ struct ieee80211_txq *ieee80211_next_txq
+ struct ieee80211_txq *ret = NULL;
+ struct txq_info *txqi = NULL, *head = NULL;
+ bool found_eligible_txq = false;
++ bool aql_check;
+
+ spin_lock_bh(&local->active_txq_lock[ac]);
+
+@@ -4048,26 +4049,26 @@ struct ieee80211_txq *ieee80211_next_txq
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
+ if (!head)
+ head = txqi;
+
++ aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
++ if (aql_check)
++ found_eligible_txq = true;
++
+ if (txqi->txq.sta) {
+ struct sta_info *sta = container_of(txqi->txq.sta,
+ struct sta_info, sta);
+- bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
+- s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
+-
+- if (aql_check)
+- found_eligible_txq = true;
+-
+- if (deficit < 0)
++ if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) {
+ sta->airtime[txqi->txq.ac].deficit +=
+ sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
+-
+- if (deficit < 0 || !aql_check) {
+- list_move_tail(&txqi->schedule_order,
+- &local->active_txqs[txqi->txq.ac]);
+- goto begin;
++ aql_check = false;
+ }
+ }
+
++ if (!aql_check) {
++ list_move_tail(&txqi->schedule_order,
++ &local->active_txqs[txqi->txq.ac]);
++ goto begin;
++ }
++
+ if (txqi->schedule_round == local->schedule_round[ac])
+ goto out;
+
+@@ -4132,7 +4133,8 @@ bool ieee80211_txq_airtime_check(struct
+ return true;
+
+ if (!txq->sta)
+- return true;
++ return atomic_read(&local->aql_bc_pending_airtime) <
++ local->aql_txq_limit_bc;
+
+ if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
+ return true;
+@@ -4181,15 +4183,15 @@ bool ieee80211_txq_may_transmit(struct i
+
+ spin_lock_bh(&local->active_txq_lock[ac]);
+
+- if (!txqi->txq.sta)
+- goto out;
+-
+ if (list_empty(&txqi->schedule_order))
+ goto out;
+
+ if (!ieee80211_txq_schedule_airtime_check(local, ac))
+ goto out;
+
++ if (!txqi->txq.sta)
++ goto out;
++
+ list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+ schedule_order) {
+ if (iter == txqi)
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1092,6 +1092,7 @@ ieee80211_rate_get_vht_nss(const struct
+ * link the frame will be transmitted on
+ * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
+ * @ack_frame_id: internal frame ID for TX status, used internally
++ * @tx_time_mc: TX time is for a multicast packet
+ * @tx_time_est: TX time estimate in units of 4us, used internally
+ * @control: union part for control data
+ * @control.rates: TX rates array to try
+@@ -1131,8 +1132,9 @@ struct ieee80211_tx_info {
+ /* common information */
+ u32 flags;
+ u32 band:3,
+- ack_frame_id:13,
++ ack_frame_id:12,
+ hw_queue:4,
++ tx_time_mc:1,
+ tx_time_est:10;
+ /* 2 free bits */
+
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -147,7 +147,8 @@ struct airtime_info {
+
+ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
+ struct sta_info *sta, u8 ac,
+- u16 tx_airtime, bool tx_completed);
++ u16 tx_airtime, bool tx_completed,
++ bool mcast);
+
+ struct sta_info;
+
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -716,7 +716,7 @@ static void ieee80211_report_used_skb(st
+ ieee80211_sta_update_pending_airtime(local, sta,
+ skb_get_queue_mapping(skb),
+ tx_time_est,
+- true);
++ true, info->tx_time_mc);
+ rcu_read_unlock();
+ }
+
+@@ -1127,10 +1127,11 @@ void ieee80211_tx_status_ext(struct ieee
+ /* Do this here to avoid the expensive lookup of the sta
+ * in ieee80211_report_used_skb().
+ */
++ bool mcast = IEEE80211_SKB_CB(skb)->tx_time_mc;
+ ieee80211_sta_update_pending_airtime(local, sta,
+ skb_get_queue_mapping(skb),
+ tx_time_est,
+- true);
++ true, mcast);
+ ieee80211_info_set_tx_time_est(IEEE80211_SKB_CB(skb), 0);
+ }
+