170 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			170 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /*
 | |
|  * fill_buf benchmark
 | |
|  *
 | |
|  * Copyright (C) 2018 Intel Corporation
 | |
|  *
 | |
|  * Authors:
 | |
|  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
 | |
|  *    Fenghua Yu <fenghua.yu@intel.com>
 | |
|  */
 | |
| #include <stdio.h>
 | |
| #include <unistd.h>
 | |
| #include <stdlib.h>
 | |
| #include <sys/types.h>
 | |
| #include <sys/wait.h>
 | |
| #include <inttypes.h>
 | |
| #include <string.h>
 | |
| 
 | |
| #include "resctrl.h"
 | |
| 
 | |
| #define CL_SIZE			(64)
 | |
| #define PAGE_SIZE		(4 * 1024)
 | |
| #define MB			(1024 * 1024)
 | |
| 
 | |
| static void sb(void)
 | |
| {
 | |
| #if defined(__i386) || defined(__x86_64)
 | |
| 	asm volatile("sfence\n\t"
 | |
| 		     : : : "memory");
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static void cl_flush(void *p)
 | |
| {
 | |
| #if defined(__i386) || defined(__x86_64)
 | |
| 	asm volatile("clflush (%0)\n\t"
 | |
| 		     : : "r"(p) : "memory");
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void mem_flush(unsigned char *buf, size_t buf_size)
 | |
| {
 | |
| 	unsigned char *cp = buf;
 | |
| 	size_t i = 0;
 | |
| 
 | |
| 	buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
 | |
| 
 | |
| 	for (i = 0; i < buf_size; i++)
 | |
| 		cl_flush(&cp[i * CL_SIZE]);
 | |
| 
 | |
| 	sb();
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Buffer index step advance to workaround HW prefetching interfering with
 | |
|  * the measurements.
 | |
|  *
 | |
|  * Must be a prime to step through all indexes of the buffer.
 | |
|  *
 | |
|  * Some primes work better than others on some architectures (from MBA/MBM
 | |
|  * result stability point of view).
 | |
|  */
 | |
| #define FILL_IDX_MULT	23
 | |
| 
 | |
| static int fill_one_span_read(unsigned char *buf, size_t buf_size)
 | |
| {
 | |
| 	unsigned int size = buf_size / (CL_SIZE / 2);
 | |
| 	unsigned int i, idx = 0;
 | |
| 	unsigned char sum = 0;
 | |
| 
 | |
| 	/*
 | |
| 	 * Read the buffer in an order that is unexpected by HW prefetching
 | |
| 	 * optimizations to prevent them interfering with the caching pattern.
 | |
| 	 *
 | |
| 	 * The read order is (in terms of halves of cachelines):
 | |
| 	 *	i * FILL_IDX_MULT % size
 | |
| 	 * The formula is open-coded below to avoiding modulo inside the loop
 | |
| 	 * as it improves MBA/MBM result stability on some architectures.
 | |
| 	 */
 | |
| 	for (i = 0; i < size; i++) {
 | |
| 		sum += buf[idx * (CL_SIZE / 2)];
 | |
| 
 | |
| 		idx += FILL_IDX_MULT;
 | |
| 		while (idx >= size)
 | |
| 			idx -= size;
 | |
| 	}
 | |
| 
 | |
| 	return sum;
 | |
| }
 | |
| 
 | |
| static void fill_one_span_write(unsigned char *buf, size_t buf_size)
 | |
| {
 | |
| 	unsigned char *end_ptr = buf + buf_size;
 | |
| 	unsigned char *p;
 | |
| 
 | |
| 	p = buf;
 | |
| 	while (p < end_ptr) {
 | |
| 		*p = '1';
 | |
| 		p += (CL_SIZE / 2);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
 | |
| {
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	while (1) {
 | |
| 		ret = fill_one_span_read(buf, buf_size);
 | |
| 		if (once)
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	/* Consume read result so that reading memory is not optimized out. */
 | |
| 	*value_sink = ret;
 | |
| }
 | |
| 
 | |
| static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
 | |
| {
 | |
| 	while (1) {
 | |
| 		fill_one_span_write(buf, buf_size);
 | |
| 		if (once)
 | |
| 			break;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| unsigned char *alloc_buffer(size_t buf_size, int memflush)
 | |
| {
 | |
| 	void *buf = NULL;
 | |
| 	uint64_t *p64;
 | |
| 	size_t s64;
 | |
| 	int ret;
 | |
| 
 | |
| 	ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
 | |
| 	if (ret < 0)
 | |
| 		return NULL;
 | |
| 
 | |
| 	/* Initialize the buffer */
 | |
| 	p64 = buf;
 | |
| 	s64 = buf_size / sizeof(uint64_t);
 | |
| 
 | |
| 	while (s64 > 0) {
 | |
| 		*p64 = (uint64_t)rand();
 | |
| 		p64 += (CL_SIZE / sizeof(uint64_t));
 | |
| 		s64 -= (CL_SIZE / sizeof(uint64_t));
 | |
| 	}
 | |
| 
 | |
| 	/* Flush the memory before using to avoid "cache hot pages" effect */
 | |
| 	if (memflush)
 | |
| 		mem_flush(buf, buf_size);
 | |
| 
 | |
| 	return buf;
 | |
| }
 | |
| 
 | |
| int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
 | |
| {
 | |
| 	unsigned char *buf;
 | |
| 
 | |
| 	buf = alloc_buffer(buf_size, memflush);
 | |
| 	if (!buf)
 | |
| 		return -1;
 | |
| 
 | |
| 	if (op == 0)
 | |
| 		fill_cache_read(buf, buf_size, once);
 | |
| 	else
 | |
| 		fill_cache_write(buf, buf_size, once);
 | |
| 	free(buf);
 | |
| 
 | |
| 	return 0;
 | |
| }
 |