518 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			518 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| 
 | |
| /* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
 | |
|  *
 | |
|  * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
 | |
|  *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
 | |
|  */
 | |
| 
 | |
| #include <linux/module.h>
 | |
| #include <linux/types.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/string.h>
 | |
| #include <linux/errno.h>
 | |
| #include <linux/errqueue.h>
 | |
| #include <linux/rbtree.h>
 | |
| #include <linux/skbuff.h>
 | |
| #include <linux/posix-timers.h>
 | |
| #include <net/netlink.h>
 | |
| #include <net/sch_generic.h>
 | |
| #include <net/pkt_sched.h>
 | |
| #include <net/sock.h>
 | |
| 
 | |
| #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
 | |
| #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
 | |
| #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
 | |
| 
 | |
| struct etf_sched_data {
 | |
| 	bool offload;
 | |
| 	bool deadline_mode;
 | |
| 	bool skip_sock_check;
 | |
| 	int clockid;
 | |
| 	int queue;
 | |
| 	s32 delta; /* in ns */
 | |
| 	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
 | |
| 	struct rb_root_cached head;
 | |
| 	struct qdisc_watchdog watchdog;
 | |
| 	ktime_t (*get_time)(void);
 | |
| };
 | |
| 
 | |
| static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
 | |
| 	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
 | |
| };
 | |
| 
 | |
| static inline int validate_input_params(struct tc_etf_qopt *qopt,
 | |
| 					struct netlink_ext_ack *extack)
 | |
| {
 | |
| 	/* Check if params comply to the following rules:
 | |
| 	 *	* Clockid and delta must be valid.
 | |
| 	 *
 | |
| 	 *	* Dynamic clockids are not supported.
 | |
| 	 *
 | |
| 	 *	* Delta must be a positive integer.
 | |
| 	 *
 | |
| 	 * Also note that for the HW offload case, we must
 | |
| 	 * expect that system clocks have been synchronized to PHC.
 | |
| 	 */
 | |
| 	if (qopt->clockid < 0) {
 | |
| 		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
 | |
| 		return -ENOTSUPP;
 | |
| 	}
 | |
| 
 | |
| 	if (qopt->clockid != CLOCK_TAI) {
 | |
| 		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	if (qopt->delta < 0) {
 | |
| 		NL_SET_ERR_MSG(extack, "Delta must be positive");
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	ktime_t txtime = nskb->tstamp;
 | |
| 	struct sock *sk = nskb->sk;
 | |
| 	ktime_t now;
 | |
| 
 | |
| 	if (q->skip_sock_check)
 | |
| 		goto skip;
 | |
| 
 | |
| 	if (!sk || !sk_fullsock(sk))
 | |
| 		return false;
 | |
| 
 | |
| 	if (!sock_flag(sk, SOCK_TXTIME))
 | |
| 		return false;
 | |
| 
 | |
| 	/* We don't perform crosstimestamping.
 | |
| 	 * Drop if packet's clockid differs from qdisc's.
 | |
| 	 */
 | |
| 	if (sk->sk_clockid != q->clockid)
 | |
| 		return false;
 | |
| 
 | |
| 	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
 | |
| 		return false;
 | |
| 
 | |
| skip:
 | |
| 	now = q->get_time();
 | |
| 	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct rb_node *p;
 | |
| 
 | |
| 	p = rb_first_cached(&q->head);
 | |
| 	if (!p)
 | |
| 		return NULL;
 | |
| 
 | |
| 	return rb_to_skb(p);
 | |
| }
 | |
| 
 | |
| static void reset_watchdog(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct sk_buff *skb = etf_peek_timesortedlist(sch);
 | |
| 	ktime_t next;
 | |
| 
 | |
| 	if (!skb) {
 | |
| 		qdisc_watchdog_cancel(&q->watchdog);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	next = ktime_sub_ns(skb->tstamp, q->delta);
 | |
| 	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
 | |
| }
 | |
| 
 | |
| static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
 | |
| {
 | |
| 	struct sock_exterr_skb *serr;
 | |
| 	struct sk_buff *clone;
 | |
| 	ktime_t txtime = skb->tstamp;
 | |
| 	struct sock *sk = skb->sk;
 | |
| 
 | |
| 	if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
 | |
| 		return;
 | |
| 
 | |
| 	clone = skb_clone(skb, GFP_ATOMIC);
 | |
| 	if (!clone)
 | |
| 		return;
 | |
| 
 | |
| 	serr = SKB_EXT_ERR(clone);
 | |
| 	serr->ee.ee_errno = err;
 | |
| 	serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
 | |
| 	serr->ee.ee_type = 0;
 | |
| 	serr->ee.ee_code = code;
 | |
| 	serr->ee.ee_pad = 0;
 | |
| 	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
 | |
| 	serr->ee.ee_info = txtime; /* low part of tstamp */
 | |
| 
 | |
| 	if (sock_queue_err_skb(sk, clone))
 | |
| 		kfree_skb(clone);
 | |
| }
 | |
| 
 | |
| static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 | |
| 				      struct sk_buff **to_free)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
 | |
| 	ktime_t txtime = nskb->tstamp;
 | |
| 	bool leftmost = true;
 | |
| 
 | |
| 	if (!is_packet_valid(sch, nskb)) {
 | |
| 		report_sock_error(nskb, EINVAL,
 | |
| 				  SO_EE_CODE_TXTIME_INVALID_PARAM);
 | |
| 		return qdisc_drop(nskb, sch, to_free);
 | |
| 	}
 | |
| 
 | |
| 	while (*p) {
 | |
| 		struct sk_buff *skb;
 | |
| 
 | |
| 		parent = *p;
 | |
| 		skb = rb_to_skb(parent);
 | |
| 		if (ktime_compare(txtime, skb->tstamp) >= 0) {
 | |
| 			p = &parent->rb_right;
 | |
| 			leftmost = false;
 | |
| 		} else {
 | |
| 			p = &parent->rb_left;
 | |
| 		}
 | |
| 	}
 | |
| 	rb_link_node(&nskb->rbnode, parent, p);
 | |
| 	rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
 | |
| 
 | |
| 	qdisc_qstats_backlog_inc(sch, nskb);
 | |
| 	sch->q.qlen++;
 | |
| 
 | |
| 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
 | |
| 	reset_watchdog(sch);
 | |
| 
 | |
| 	return NET_XMIT_SUCCESS;
 | |
| }
 | |
| 
 | |
| static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
 | |
| 				ktime_t now)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct sk_buff *to_free = NULL;
 | |
| 	struct sk_buff *tmp = NULL;
 | |
| 
 | |
| 	skb_rbtree_walk_from_safe(skb, tmp) {
 | |
| 		if (ktime_after(skb->tstamp, now))
 | |
| 			break;
 | |
| 
 | |
| 		rb_erase_cached(&skb->rbnode, &q->head);
 | |
| 
 | |
| 		/* The rbnode field in the skb re-uses these fields, now that
 | |
| 		 * we are done with the rbnode, reset them.
 | |
| 		 */
 | |
| 		skb->next = NULL;
 | |
| 		skb->prev = NULL;
 | |
| 		skb->dev = qdisc_dev(sch);
 | |
| 
 | |
| 		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
 | |
| 
 | |
| 		qdisc_qstats_backlog_dec(sch, skb);
 | |
| 		qdisc_drop(skb, sch, &to_free);
 | |
| 		qdisc_qstats_overlimit(sch);
 | |
| 		sch->q.qlen--;
 | |
| 	}
 | |
| 
 | |
| 	kfree_skb_list(to_free);
 | |
| }
 | |
| 
 | |
| static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 
 | |
| 	rb_erase_cached(&skb->rbnode, &q->head);
 | |
| 
 | |
| 	/* The rbnode field in the skb re-uses these fields, now that
 | |
| 	 * we are done with the rbnode, reset them.
 | |
| 	 */
 | |
| 	skb->next = NULL;
 | |
| 	skb->prev = NULL;
 | |
| 	skb->dev = qdisc_dev(sch);
 | |
| 
 | |
| 	qdisc_qstats_backlog_dec(sch, skb);
 | |
| 
 | |
| 	qdisc_bstats_update(sch, skb);
 | |
| 
 | |
| 	q->last = skb->tstamp;
 | |
| 
 | |
| 	sch->q.qlen--;
 | |
| }
 | |
| 
 | |
| static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct sk_buff *skb;
 | |
| 	ktime_t now, next;
 | |
| 
 | |
| 	skb = etf_peek_timesortedlist(sch);
 | |
| 	if (!skb)
 | |
| 		return NULL;
 | |
| 
 | |
| 	now = q->get_time();
 | |
| 
 | |
| 	/* Drop if packet has expired while in queue. */
 | |
| 	if (ktime_before(skb->tstamp, now)) {
 | |
| 		timesortedlist_drop(sch, skb, now);
 | |
| 		skb = NULL;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	/* When in deadline mode, dequeue as soon as possible and change the
 | |
| 	 * txtime from deadline to (now + delta).
 | |
| 	 */
 | |
| 	if (q->deadline_mode) {
 | |
| 		timesortedlist_remove(sch, skb);
 | |
| 		skb->tstamp = now;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	next = ktime_sub_ns(skb->tstamp, q->delta);
 | |
| 
 | |
| 	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
 | |
| 	if (ktime_after(now, next))
 | |
| 		timesortedlist_remove(sch, skb);
 | |
| 	else
 | |
| 		skb = NULL;
 | |
| 
 | |
| out:
 | |
| 	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
 | |
| 	reset_watchdog(sch);
 | |
| 
 | |
| 	return skb;
 | |
| }
 | |
| 
 | |
| static void etf_disable_offload(struct net_device *dev,
 | |
| 				struct etf_sched_data *q)
 | |
| {
 | |
| 	struct tc_etf_qopt_offload etf = { };
 | |
| 	const struct net_device_ops *ops;
 | |
| 	int err;
 | |
| 
 | |
| 	if (!q->offload)
 | |
| 		return;
 | |
| 
 | |
| 	ops = dev->netdev_ops;
 | |
| 	if (!ops->ndo_setup_tc)
 | |
| 		return;
 | |
| 
 | |
| 	etf.queue = q->queue;
 | |
| 	etf.enable = 0;
 | |
| 
 | |
| 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 | |
| 	if (err < 0)
 | |
| 		pr_warn("Couldn't disable ETF offload for queue %d\n",
 | |
| 			etf.queue);
 | |
| }
 | |
| 
 | |
| static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
 | |
| 			      struct netlink_ext_ack *extack)
 | |
| {
 | |
| 	const struct net_device_ops *ops = dev->netdev_ops;
 | |
| 	struct tc_etf_qopt_offload etf = { };
 | |
| 	int err;
 | |
| 
 | |
| 	if (!ops->ndo_setup_tc) {
 | |
| 		NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
 | |
| 		return -EOPNOTSUPP;
 | |
| 	}
 | |
| 
 | |
| 	etf.queue = q->queue;
 | |
| 	etf.enable = 1;
 | |
| 
 | |
| 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 | |
| 	if (err < 0) {
 | |
| 		NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
 | |
| 		return err;
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 | |
| 		    struct netlink_ext_ack *extack)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 	struct nlattr *tb[TCA_ETF_MAX + 1];
 | |
| 	struct tc_etf_qopt *qopt;
 | |
| 	int err;
 | |
| 
 | |
| 	if (!opt) {
 | |
| 		NL_SET_ERR_MSG(extack,
 | |
| 			       "Missing ETF qdisc options which are mandatory");
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
 | |
| 					  extack);
 | |
| 	if (err < 0)
 | |
| 		return err;
 | |
| 
 | |
| 	if (!tb[TCA_ETF_PARMS]) {
 | |
| 		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	qopt = nla_data(tb[TCA_ETF_PARMS]);
 | |
| 
 | |
| 	pr_debug("delta %d clockid %d offload %s deadline %s\n",
 | |
| 		 qopt->delta, qopt->clockid,
 | |
| 		 OFFLOAD_IS_ON(qopt) ? "on" : "off",
 | |
| 		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
 | |
| 
 | |
| 	err = validate_input_params(qopt, extack);
 | |
| 	if (err < 0)
 | |
| 		return err;
 | |
| 
 | |
| 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 | |
| 
 | |
| 	if (OFFLOAD_IS_ON(qopt)) {
 | |
| 		err = etf_enable_offload(dev, q, extack);
 | |
| 		if (err < 0)
 | |
| 			return err;
 | |
| 	}
 | |
| 
 | |
| 	/* Everything went OK, save the parameters used. */
 | |
| 	q->delta = qopt->delta;
 | |
| 	q->clockid = qopt->clockid;
 | |
| 	q->offload = OFFLOAD_IS_ON(qopt);
 | |
| 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
 | |
| 	q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
 | |
| 
 | |
| 	switch (q->clockid) {
 | |
| 	case CLOCK_REALTIME:
 | |
| 		q->get_time = ktime_get_real;
 | |
| 		break;
 | |
| 	case CLOCK_MONOTONIC:
 | |
| 		q->get_time = ktime_get;
 | |
| 		break;
 | |
| 	case CLOCK_BOOTTIME:
 | |
| 		q->get_time = ktime_get_boottime;
 | |
| 		break;
 | |
| 	case CLOCK_TAI:
 | |
| 		q->get_time = ktime_get_clocktai;
 | |
| 		break;
 | |
| 	default:
 | |
| 		NL_SET_ERR_MSG(extack, "Clockid is not supported");
 | |
| 		return -ENOTSUPP;
 | |
| 	}
 | |
| 
 | |
| 	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static void timesortedlist_clear(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct rb_node *p = rb_first_cached(&q->head);
 | |
| 
 | |
| 	while (p) {
 | |
| 		struct sk_buff *skb = rb_to_skb(p);
 | |
| 
 | |
| 		p = rb_next(p);
 | |
| 
 | |
| 		rb_erase_cached(&skb->rbnode, &q->head);
 | |
| 		rtnl_kfree_skbs(skb, skb);
 | |
| 		sch->q.qlen--;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void etf_reset(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 
 | |
| 	/* Only cancel watchdog if it's been initialized. */
 | |
| 	if (q->watchdog.qdisc == sch)
 | |
| 		qdisc_watchdog_cancel(&q->watchdog);
 | |
| 
 | |
| 	/* No matter which mode we are on, it's safe to clear both lists. */
 | |
| 	timesortedlist_clear(sch);
 | |
| 	__qdisc_reset_queue(&sch->q);
 | |
| 
 | |
| 	q->last = 0;
 | |
| }
 | |
| 
 | |
| static void etf_destroy(struct Qdisc *sch)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct net_device *dev = qdisc_dev(sch);
 | |
| 
 | |
| 	/* Only cancel watchdog if it's been initialized. */
 | |
| 	if (q->watchdog.qdisc == sch)
 | |
| 		qdisc_watchdog_cancel(&q->watchdog);
 | |
| 
 | |
| 	etf_disable_offload(dev, q);
 | |
| }
 | |
| 
 | |
| static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 | |
| {
 | |
| 	struct etf_sched_data *q = qdisc_priv(sch);
 | |
| 	struct tc_etf_qopt opt = { };
 | |
| 	struct nlattr *nest;
 | |
| 
 | |
| 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 | |
| 	if (!nest)
 | |
| 		goto nla_put_failure;
 | |
| 
 | |
| 	opt.delta = READ_ONCE(q->delta);
 | |
| 	opt.clockid = READ_ONCE(q->clockid);
 | |
| 	if (READ_ONCE(q->offload))
 | |
| 		opt.flags |= TC_ETF_OFFLOAD_ON;
 | |
| 
 | |
| 	if (READ_ONCE(q->deadline_mode))
 | |
| 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 | |
| 
 | |
| 	if (READ_ONCE(q->skip_sock_check))
 | |
| 		opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
 | |
| 
 | |
| 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
 | |
| 		goto nla_put_failure;
 | |
| 
 | |
| 	return nla_nest_end(skb, nest);
 | |
| 
 | |
| nla_put_failure:
 | |
| 	nla_nest_cancel(skb, nest);
 | |
| 	return -1;
 | |
| }
 | |
| 
 | |
| static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
 | |
| 	.id		=	"etf",
 | |
| 	.priv_size	=	sizeof(struct etf_sched_data),
 | |
| 	.enqueue	=	etf_enqueue_timesortedlist,
 | |
| 	.dequeue	=	etf_dequeue_timesortedlist,
 | |
| 	.peek		=	etf_peek_timesortedlist,
 | |
| 	.init		=	etf_init,
 | |
| 	.reset		=	etf_reset,
 | |
| 	.destroy	=	etf_destroy,
 | |
| 	.dump		=	etf_dump,
 | |
| 	.owner		=	THIS_MODULE,
 | |
| };
 | |
| MODULE_ALIAS_NET_SCH("etf");
 | |
| 
 | |
| static int __init etf_module_init(void)
 | |
| {
 | |
| 	return register_qdisc(&etf_qdisc_ops);
 | |
| }
 | |
| 
 | |
| static void __exit etf_module_exit(void)
 | |
| {
 | |
| 	unregister_qdisc(&etf_qdisc_ops);
 | |
| }
 | |
| module_init(etf_module_init)
 | |
| module_exit(etf_module_exit)
 | |
| MODULE_LICENSE("GPL");
 | |
| MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc");
 |