271 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			271 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-only
 | |
| /*
 | |
|  * Copyright (C) 2016 Red Hat, Inc.
 | |
|  * Author: Michael S. Tsirkin <mst@redhat.com>
 | |
|  *
 | |
|  * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
 | |
|  * signalling, unconditionally.
 | |
|  */
 | |
| #define _GNU_SOURCE
 | |
| #include "main.h"
 | |
| #include <stdlib.h>
 | |
| #include <stdio.h>
 | |
| #include <string.h>
 | |
| 
 | |
| /* Next - Where next entry will be written.
 | |
|  * Prev - "Next" value when event triggered previously.
 | |
|  * Event - Peer requested event after writing this entry.
 | |
|  */
 | |
| static inline bool need_event(unsigned short event,
 | |
| 			      unsigned short next,
 | |
| 			      unsigned short prev)
 | |
| {
 | |
| 	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
 | |
| }
 | |
| 
 | |
| /* Design:
 | |
|  * Guest adds descriptors with unique index values and DESC_HW in flags.
 | |
|  * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
 | |
|  * Flags are always set last.
 | |
|  */
 | |
| #define DESC_HW 0x1
 | |
| 
 | |
| struct desc {
 | |
| 	unsigned short flags;
 | |
| 	unsigned short index;
 | |
| 	unsigned len;
 | |
| 	unsigned long long addr;
 | |
| };
 | |
| 
 | |
| /* how much padding is needed to avoid false cache sharing */
 | |
| #define HOST_GUEST_PADDING 0x80
 | |
| 
 | |
| /* Mostly read */
 | |
| struct event {
 | |
| 	unsigned short kick_index;
 | |
| 	unsigned char reserved0[HOST_GUEST_PADDING - 2];
 | |
| 	unsigned short call_index;
 | |
| 	unsigned char reserved1[HOST_GUEST_PADDING - 2];
 | |
| };
 | |
| 
 | |
| struct data {
 | |
| 	void *buf; /* descriptor is writeable, we can't get buf from there */
 | |
| 	void *data;
 | |
| } *data;
 | |
| 
 | |
| struct desc *ring;
 | |
| struct event *event;
 | |
| 
 | |
| struct guest {
 | |
| 	unsigned avail_idx;
 | |
| 	unsigned last_used_idx;
 | |
| 	unsigned num_free;
 | |
| 	unsigned kicked_avail_idx;
 | |
| 	unsigned char reserved[HOST_GUEST_PADDING - 12];
 | |
| } guest;
 | |
| 
 | |
| struct host {
 | |
| 	/* we do not need to track last avail index
 | |
| 	 * unless we have more than one in flight.
 | |
| 	 */
 | |
| 	unsigned used_idx;
 | |
| 	unsigned called_used_idx;
 | |
| 	unsigned char reserved[HOST_GUEST_PADDING - 4];
 | |
| } host;
 | |
| 
 | |
| /* implemented by ring */
 | |
| void alloc_ring(void)
 | |
| {
 | |
| 	int ret;
 | |
| 	int i;
 | |
| 
 | |
| 	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
 | |
| 	if (ret) {
 | |
| 		perror("Unable to allocate ring buffer.\n");
 | |
| 		exit(3);
 | |
| 	}
 | |
| 	event = calloc(1, sizeof(*event));
 | |
| 	if (!event) {
 | |
| 		perror("Unable to allocate event buffer.\n");
 | |
| 		exit(3);
 | |
| 	}
 | |
| 	guest.avail_idx = 0;
 | |
| 	guest.kicked_avail_idx = -1;
 | |
| 	guest.last_used_idx = 0;
 | |
| 	host.used_idx = 0;
 | |
| 	host.called_used_idx = -1;
 | |
| 	for (i = 0; i < ring_size; ++i) {
 | |
| 		struct desc desc = {
 | |
| 			.index = i,
 | |
| 		};
 | |
| 		ring[i] = desc;
 | |
| 	}
 | |
| 	guest.num_free = ring_size;
 | |
| 	data = calloc(ring_size, sizeof(*data));
 | |
| 	if (!data) {
 | |
| 		perror("Unable to allocate data buffer.\n");
 | |
| 		exit(3);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* guest side */
 | |
| int add_inbuf(unsigned len, void *buf, void *datap)
 | |
| {
 | |
| 	unsigned head, index;
 | |
| 
 | |
| 	if (!guest.num_free)
 | |
| 		return -1;
 | |
| 
 | |
| 	guest.num_free--;
 | |
| 	head = (ring_size - 1) & (guest.avail_idx++);
 | |
| 
 | |
| 	/* Start with a write. On MESI architectures this helps
 | |
| 	 * avoid a shared state with consumer that is polling this descriptor.
 | |
| 	 */
 | |
| 	ring[head].addr = (unsigned long)(void*)buf;
 | |
| 	ring[head].len = len;
 | |
| 	/* read below might bypass write above. That is OK because it's just an
 | |
| 	 * optimization. If this happens, we will get the cache line in a
 | |
| 	 * shared state which is unfortunate, but probably not worth it to
 | |
| 	 * add an explicit full barrier to avoid this.
 | |
| 	 */
 | |
| 	barrier();
 | |
| 	index = ring[head].index;
 | |
| 	data[index].buf = buf;
 | |
| 	data[index].data = datap;
 | |
| 	/* Barrier A (for pairing) */
 | |
| 	smp_release();
 | |
| 	ring[head].flags = DESC_HW;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void *get_buf(unsigned *lenp, void **bufp)
 | |
| {
 | |
| 	unsigned head = (ring_size - 1) & guest.last_used_idx;
 | |
| 	unsigned index;
 | |
| 	void *datap;
 | |
| 
 | |
| 	if (ring[head].flags & DESC_HW)
 | |
| 		return NULL;
 | |
| 	/* Barrier B (for pairing) */
 | |
| 	smp_acquire();
 | |
| 	*lenp = ring[head].len;
 | |
| 	index = ring[head].index & (ring_size - 1);
 | |
| 	datap = data[index].data;
 | |
| 	*bufp = data[index].buf;
 | |
| 	data[index].buf = NULL;
 | |
| 	data[index].data = NULL;
 | |
| 	guest.num_free++;
 | |
| 	guest.last_used_idx++;
 | |
| 	return datap;
 | |
| }
 | |
| 
 | |
| bool used_empty()
 | |
| {
 | |
| 	unsigned head = (ring_size - 1) & guest.last_used_idx;
 | |
| 
 | |
| 	return (ring[head].flags & DESC_HW);
 | |
| }
 | |
| 
 | |
| void disable_call()
 | |
| {
 | |
| 	/* Doing nothing to disable calls might cause
 | |
| 	 * extra interrupts, but reduces the number of cache misses.
 | |
| 	 */
 | |
| }
 | |
| 
 | |
| bool enable_call()
 | |
| {
 | |
| 	event->call_index = guest.last_used_idx;
 | |
| 	/* Flush call index write */
 | |
| 	/* Barrier D (for pairing) */
 | |
| 	smp_mb();
 | |
| 	return used_empty();
 | |
| }
 | |
| 
 | |
| void kick_available(void)
 | |
| {
 | |
| 	bool need;
 | |
| 
 | |
| 	/* Flush in previous flags write */
 | |
| 	/* Barrier C (for pairing) */
 | |
| 	smp_mb();
 | |
| 	need = need_event(event->kick_index,
 | |
| 			   guest.avail_idx,
 | |
| 			   guest.kicked_avail_idx);
 | |
| 
 | |
| 	guest.kicked_avail_idx = guest.avail_idx;
 | |
| 	if (need)
 | |
| 		kick();
 | |
| }
 | |
| 
 | |
| /* host side */
 | |
| void disable_kick()
 | |
| {
 | |
| 	/* Doing nothing to disable kicks might cause
 | |
| 	 * extra interrupts, but reduces the number of cache misses.
 | |
| 	 */
 | |
| }
 | |
| 
 | |
| bool enable_kick()
 | |
| {
 | |
| 	event->kick_index = host.used_idx;
 | |
| 	/* Barrier C (for pairing) */
 | |
| 	smp_mb();
 | |
| 	return avail_empty();
 | |
| }
 | |
| 
 | |
| bool avail_empty()
 | |
| {
 | |
| 	unsigned head = (ring_size - 1) & host.used_idx;
 | |
| 
 | |
| 	return !(ring[head].flags & DESC_HW);
 | |
| }
 | |
| 
 | |
| bool use_buf(unsigned *lenp, void **bufp)
 | |
| {
 | |
| 	unsigned head = (ring_size - 1) & host.used_idx;
 | |
| 
 | |
| 	if (!(ring[head].flags & DESC_HW))
 | |
| 		return false;
 | |
| 
 | |
| 	/* make sure length read below is not speculated */
 | |
| 	/* Barrier A (for pairing) */
 | |
| 	smp_acquire();
 | |
| 
 | |
| 	/* simple in-order completion: we don't need
 | |
| 	 * to touch index at all. This also means we
 | |
| 	 * can just modify the descriptor in-place.
 | |
| 	 */
 | |
| 	ring[head].len--;
 | |
| 	/* Make sure len is valid before flags.
 | |
| 	 * Note: alternative is to write len and flags in one access -
 | |
| 	 * possible on 64 bit architectures but wmb is free on Intel anyway
 | |
| 	 * so I have no way to test whether it's a gain.
 | |
| 	 */
 | |
| 	/* Barrier B (for pairing) */
 | |
| 	smp_release();
 | |
| 	ring[head].flags = 0;
 | |
| 	host.used_idx++;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void call_used(void)
 | |
| {
 | |
| 	bool need;
 | |
| 
 | |
| 	/* Flush in previous flags write */
 | |
| 	/* Barrier D (for pairing) */
 | |
| 	smp_mb();
 | |
| 
 | |
| 	need = need_event(event->call_index,
 | |
| 			host.used_idx,
 | |
| 			host.called_used_idx);
 | |
| 
 | |
| 	host.called_used_idx = host.used_idx;
 | |
| 
 | |
| 	if (need)
 | |
| 		call();
 | |
| }
 |