Index: a/src/mono/mono/utils/memfuncs.c =================================================================== --- a/src/mono/mono/utils/memfuncs.c +++ b/src/mono/mono/utils/memfuncs.c @@ -343,6 +343,9 @@ mono_determine_physical_ram_available_si host_page_size (host, &page_size); return (guint64) vmstat.free_count * page_size; +#elif defined (__FreeBSD__) || defined (__linux__) || defined (__APPLE__) + return (getPhysicalMemoryAvail()); + #elif defined (HAVE_SYSCONF) gint64 page_size = -1, num_pages = -1; Index: a/src/mono/mono/utils/memfuncs.h =================================================================== --- a/src/mono/mono/utils/memfuncs.h +++ b/src/mono/mono/utils/memfuncs.h @@ -24,5 +24,10 @@ MONO_COMPONENT_API void mono_gc_memmove_ void mono_gc_memmove_aligned (void *dest, const void *src, size_t size); guint64 mono_determine_physical_ram_size (void); guint64 mono_determine_physical_ram_available_size (void); +#if defined (__FreeBSD__) || defined (__linux__) || defined (__APPLE__) +size_t getRestrictedPhysicalMemoryLimit(void); +gboolean getPhysicalMemoryUsed(size_t *); +size_t getPhysicalMemoryAvail(void); +#endif #endif Index: a/src/mono/mono/utils/mono-cgroup.c =================================================================== --- /dev/null +++ b/src/mono/mono/utils/mono-cgroup.c @@ -0,0 +1,709 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*++ + +Module Name: + + mono-cgroup.cpp + +Abstract: + Read the memory limit for the current process +--*/ +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#include +#else +#include +#endif +#include +#include + +#ifndef SIZE_T_MAX +# define SIZE_T_MAX (~(size_t)0) +#endif + +#define CGROUP2_SUPER_MAGIC 0x63677270 +#define TMPFS_MAGIC 0x01021994 + +#define PROC_MOUNTINFO_FILENAME "/proc/self/mountinfo" +#define PROC_CGROUP_FILENAME "/proc/self/cgroup" +#define PROC_STATM_FILENAME "/proc/self/statm" +#define CGROUP1_MEMORY_LIMIT_FILENAME "/memory.limit_in_bytes" +#define CGROUP2_MEMORY_LIMIT_FILENAME "/memory.max" +#define CGROUP_MEMORY_STAT_FILENAME "/memory.stat" + +static void initialize(void); +static gboolean readMemoryValueFromFile(const char *, guint64 *); +static gboolean getPhysicalMemoryLimit(guint64 *); +static gboolean getPhysicalMemoryUsage(size_t *); +static int findCGroupVersion(void); +static gboolean isCGroup1MemorySubsystem(const char *); +static char *findCGroupPath(gboolean (*is_subsystem)(const char *)); +static void findHierarchyMount(gboolean (*is_subsystem)(const char *), char **, char **); +static char *findCGroupPathForSubsystem(gboolean (*is_subsystem)(const char *)); +static gboolean getCGroupMemoryLimit(guint64 *, const char *); +static gboolean getCGroupMemoryUsage(size_t *); +static size_t getPhysicalMemoryTotal(guint64); + +size_t getRestrictedPhysicalMemoryLimit(void); +gboolean getPhysicalMemoryUsed(size_t *); +size_t getPhysicalMemoryAvail(void); + +// the cgroup version number or 0 to indicate cgroups are not found or not enabled +static int s_cgroup_version; + +static char *s_memory_cgroup_path = NULL; + +static const char *s_mem_stat_key_names[4]; +static size_t s_mem_stat_key_lengths[4]; +static size_t s_mem_stat_n_keys = 0; +static long pageSize; + +/** + * @initialize + * + * Initialize variables used by the calculation routines. + */ +static void +initialize() +{ + s_cgroup_version = findCGroupVersion(); + s_memory_cgroup_path = findCGroupPath(s_cgroup_version == 1 ? &isCGroup1MemorySubsystem : NULL); + + if (s_cgroup_version == 1) { + s_mem_stat_n_keys = 4; + s_mem_stat_key_names[0] = "total_inactive_anon "; + s_mem_stat_key_names[1] = "total_active_anon "; + s_mem_stat_key_names[2] = "total_dirty "; + s_mem_stat_key_names[3] = "total_unevictable "; + } else { + s_mem_stat_n_keys = 3; + s_mem_stat_key_names[0] = "anon "; + s_mem_stat_key_names[1] = "file_dirty "; + s_mem_stat_key_names[2] = "unevictable "; + } + + for (size_t i = 0; i < s_mem_stat_n_keys; i++) + s_mem_stat_key_lengths[i] = strlen(s_mem_stat_key_names[i]); + + pageSize = sysconf(_SC_PAGE_SIZE); +} + +/** + * @readMemoryValueFromFile + * + * @param[in] filename - name of file containing value + * @param[out] val - pointer to the result area + * @returns True or False depending if value was found + * + * Read a value from a specified /sys/fs/cgroup/memory file + */ +static gboolean +readMemoryValueFromFile(const char* filename, guint64* val) +{ + gboolean result = FALSE; + char *line = NULL; + size_t lineLen = 0; + char *endptr = NULL; + guint64 num = 0, multiplier; + FILE *file = NULL; + + if (val == NULL) { + file = fopen(filename, "r"); + if (file != NULL) { + if (getline(&line, &lineLen, file) != -1) { + errno = 0; + num = strtoull(line, &endptr, 0); + if (line != endptr && errno == 0) { + multiplier = 1; + + switch (*endptr) + { + case 'g': + case 'G': + multiplier = 1024; + case 'm': + case 'M': + multiplier = multiplier * 1024; + case 'k': + case 'K': + multiplier = multiplier * 1024; + } + + *val = num * multiplier; + result = TRUE; + if (*val / multiplier != num) + result = FALSE; + } + } + } + } + + if (file) + fclose(file); + free(line); + return result; +} + +/** + * @getPhysicalMemoryLimit + * + * @param[out] val - pointer to the result area + * @returns True or False depending if a limit was found + * + * Interrogate the cgroup memory values to determine if there's + * a limit on physical memory. + */ +static gboolean +getPhysicalMemoryLimit(guint64 *val) +{ + if (s_mem_stat_n_keys == 0) + initialize(); + + if (s_cgroup_version == 0) + return FALSE; + else if (s_cgroup_version == 1) + return getCGroupMemoryLimit(val, CGROUP1_MEMORY_LIMIT_FILENAME); + else if (s_cgroup_version == 2) + return getCGroupMemoryLimit(val, CGROUP2_MEMORY_LIMIT_FILENAME); + else { + g_assert(!"Unknown cgroup version."); + return FALSE; + } +} + +/** + * @getPhysicalMemoryUsage + * + * @param[out] val - pointer to the result area + * @returns True or False depending if a usage value was found + * + * Interrogate the cgroup memory values to determine how much + * memory is in use. + */ +static gboolean +getPhysicalMemoryUsage(size_t *val) +{ + if (s_cgroup_version == 0) + return FALSE; + else if (s_cgroup_version == 1) + return getCGroupMemoryUsage(val); + else if (s_cgroup_version == 2) + return getCGroupMemoryUsage(val); + else { + g_assert(!"Unknown cgroup version."); + return FALSE; + } +} + +/** + * @findGroupVersion + * + * @returns cgroup version + * + * Inspect the /sys/fs/cgroup hierachy to determine what version of + * group we are using + */ +static int +findCGroupVersion() +{ + // It is possible to have both cgroup v1 and v2 enabled on a system. + // Most non-bleeding-edge Linux distributions fall in this group. We + // look at the file system type of /sys/fs/cgroup to determine which + // one is the default. For more details, see: + // https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups- + // We dont care about the difference between the "legacy" and "hybrid" + // modes because both of those involve cgroup v1 controllers managing + // resources. + + + struct statfs stats; + int result = statfs("/sys/fs/cgroup", &stats); + if (result != 0) + return 0; + + switch (stats.f_type) { + case TMPFS_MAGIC: return 1; + case CGROUP2_SUPER_MAGIC: return 2; + default: + g_assert(!"Unexpected file system type for /sys/fs/cgroup"); + return 0; + } +} + +/** + * @isCGroup1MemorySubsystem + * + * @param[in] strTok - Token for comparison + * @returns True if token matches "memory" + * + * Check if we've found the memory component of /sys/fs/cgroup + */ +static gboolean +isCGroup1MemorySubsystem(const char *strTok) +{ + return strcmp("memory", strTok) == 0; +} + +/** + * @findCGroupPath + * + * @param[in] is_subsystem - Function used to compare tokens + * @returns Path to cgroup + * + * Navigate the /sys/fs/cgroup to try and find the correct cgroup path + */ +static char * +findCGroupPath(gboolean (*is_subsystem)(const char *)) +{ + char *cgroup_path = NULL; + char *hierarchy_mount = NULL; + char *hierarchy_root = NULL; + char *cgroup_path_relative_to_mount = NULL; + size_t common_path_prefix_len; + + findHierarchyMount(is_subsystem, &hierarchy_mount, &hierarchy_root); + if (hierarchy_mount != NULL && hierarchy_root != NULL) { + + cgroup_path_relative_to_mount = findCGroupPathForSubsystem(is_subsystem); + if (cgroup_path_relative_to_mount != NULL) { + + cgroup_path = (char*)malloc(strlen(hierarchy_mount) + strlen(cgroup_path_relative_to_mount) + 1); + if (cgroup_path != NULL) { + + strcpy(cgroup_path, hierarchy_mount); + // For a host cgroup, we need to append the relative path. + // The root and cgroup path can share a common prefix of the path that should not be appended. + // Example 1 (docker): + // hierarchy_mount: /sys/fs/cgroup/cpu + // hierarchy_root: /docker/87ee2de57e51bc75175a4d2e81b71d162811b179d549d6601ed70b58cad83578 + // cgroup_path_relative_to_mount: /docker/87ee2de57e51bc75175a4d2e81b71d162811b179d549d6601ed70b58cad83578/my_named_cgroup + // append do the cgroup_path: /my_named_cgroup + // final cgroup_path: /sys/fs/cgroup/cpu/my_named_cgroup + // + // Example 2 (out of docker) + // hierarchy_mount: /sys/fs/cgroup/cpu + // hierarchy_root: / + // cgroup_path_relative_to_mount: /my_named_cgroup + // append do the cgroup_path: /my_named_cgroup + // final cgroup_path: /sys/fs/cgroup/cpu/my_named_cgroup + common_path_prefix_len = strlen(hierarchy_root); + if ((common_path_prefix_len == 1) || + (strncmp(hierarchy_root, cgroup_path_relative_to_mount, common_path_prefix_len) != 0)) + common_path_prefix_len = 0; + + g_assert((cgroup_path_relative_to_mount[common_path_prefix_len] == '/') || + (cgroup_path_relative_to_mount[common_path_prefix_len] == '\0')); + + strcat(cgroup_path, cgroup_path_relative_to_mount + common_path_prefix_len); + } + } + } + + free(hierarchy_mount); + free(hierarchy_root); + free(cgroup_path_relative_to_mount); + return cgroup_path; +} + +/** + * @findHierarchyMount + * + * @param[in] is_subsystem - Comparison function + * @param[out] pmountpath - + * @param[out] pmountroot - + * + * Check the /proc filesystem to determine the root and mount path of /sys/fs/cgroup data + */ +static void +findHierarchyMount(gboolean (*is_subsystem)(const char *), char** pmountpath, char** pmountroot) +{ + char *line = NULL; + size_t lineLen = 0, maxLineLen = 0; + char *filesystemType = NULL; + char *options = NULL; + char *mountpath = NULL; + char *mountroot = NULL; + + FILE *mountinfofile = fopen(PROC_MOUNTINFO_FILENAME, "r"); + if (mountinfofile == NULL) + goto done; + + while (getline(&line, &lineLen, mountinfofile) != -1) { + if (filesystemType == NULL || lineLen > maxLineLen) { + free(filesystemType); + filesystemType = NULL; + free(options); + options = NULL; + filesystemType = (char*)malloc(lineLen+1); + if (filesystemType == NULL) + goto done; + options = (char*)malloc(lineLen+1); + if (options == NULL) + goto done; + maxLineLen = lineLen; + } + + char *separatorChar = strstr(line, " - "); + + // See man page of proc to get format for /proc/self/mountinfo file + int sscanfRet = sscanf(separatorChar, + " - %s %*s %s", + filesystemType, + options); + if (sscanfRet != 2) { + g_assert(!"Failed to parse mount info file contents with sscanf."); + goto done; + } + + if (strncmp(filesystemType, "cgroup", 6) == 0) { + gboolean isSubsystemMatch = is_subsystem == NULL; + if (!isSubsystemMatch) { + char *context = NULL; + char *strTok = strtok_r(options, ",", &context); + while (!isSubsystemMatch && strTok != NULL) + { + isSubsystemMatch = is_subsystem(strTok); + strTok = strtok_r(NULL, ",", &context); + } + } + if (isSubsystemMatch) { + mountpath = (char*)malloc(lineLen+1); + if (mountpath == NULL) + goto done; + mountroot = (char*)malloc(lineLen+1); + if (mountroot == NULL) + goto done; + + sscanfRet = sscanf(line, + "%*s %*s %*s %s %s ", + mountroot, + mountpath); + if (sscanfRet != 2) + g_assert(!"Failed to parse mount info file contents with sscanf."); + + // assign the output arguments and clear the locals so we don't free them. + *pmountpath = mountpath; + *pmountroot = mountroot; + mountpath = mountroot = NULL; + } + } + } +done: + free(mountpath); + free(mountroot); + free(filesystemType); + free(options); + free(line); + if (mountinfofile) + fclose(mountinfofile); +} + +/** + * @findCGroupPathForSubsystem + * + * @param[in] is_subsystem - Comparison function + * @returns cgroup path for the memory subsystem + * + * Check the /proc filesystem to determine the root and mount path of /sys/fs/cgroup data + */ +static char * +findCGroupPathForSubsystem(gboolean (*is_subsystem)(const char *)) +{ + char *line = NULL; + size_t lineLen = 0; + size_t maxLineLen = 0; + char *subsystem_list = NULL; + char *cgroup_path = NULL; + gboolean result = FALSE; + + FILE *cgroupfile = fopen(PROC_CGROUP_FILENAME, "r"); + if (cgroupfile == NULL) + goto done; + + while (!result && getline(&line, &lineLen, cgroupfile) != -1) { + if (subsystem_list == NULL || lineLen > maxLineLen) { + free(subsystem_list); + subsystem_list = NULL; + free(cgroup_path); + cgroup_path = NULL; + subsystem_list = (char*)malloc(lineLen+1); + if (subsystem_list == NULL) + goto done; + cgroup_path = (char*)malloc(lineLen+1); + if (cgroup_path == NULL) + goto done; + maxLineLen = lineLen; + } + + if (s_cgroup_version == 1) { + // See man page of proc to get format for /proc/self/cgroup file + int sscanfRet = sscanf(line, + "%*[^:]:%[^:]:%s", + subsystem_list, + cgroup_path); + if (sscanfRet != 2) { + g_assert(!"Failed to parse cgroup info file contents with sscanf."); + goto done; + } + + char* context = NULL; + char* strTok = strtok_r(subsystem_list, ",", &context); + while (strTok != NULL) { + if (is_subsystem(strTok)) { + result = TRUE; + break; + } + strTok = strtok_r(NULL, ",", &context); + } + } else if (s_cgroup_version == 2) { + // See https://www.kernel.org/doc/Documentation/cgroup-v2.txt + // Look for a "0::/some/path" + int sscanfRet = sscanf(line, + "0::%s", + cgroup_path); + if (sscanfRet == 1) + { + result = TRUE; + } + } else { + g_assert(!"Unknown cgroup version in mountinfo."); + goto done; + } + } +done: + free(subsystem_list); + if (!result) { + free(cgroup_path); + cgroup_path = NULL; + } + free(line); + if (cgroupfile) + fclose(cgroupfile); + return cgroup_path; +} + +/** + * @getCGroupMemoryLimit + * + * @param[out] val - Memory limit + * @param[in] filename - name of file from which to extract limit + * @returns True if value found + * + * Extract memory limit from specified /sys/fs/cgroup/memory file + */ +static gboolean +getCGroupMemoryLimit(guint64 *val, const char *filename) +{ + if (s_memory_cgroup_path == NULL) + return FALSE; + + char* mem_limit_filename = NULL; + if (asprintf(&mem_limit_filename, "%s%s", s_memory_cgroup_path, filename) < 0) + return FALSE; + + gboolean result = readMemoryValueFromFile(mem_limit_filename, val); + free(mem_limit_filename); + return result; +} + +/** + * @getCGroupMemoryUsage + * + * @param[out] val - Memory limit + * @returns True if value found + * + * Extract memory usage from /sys/fs/cgroup/memory.stat file + */ +static gboolean +getCGroupMemoryUsage(size_t *val) +{ + if (s_memory_cgroup_path == NULL) + return FALSE; + + char *stat_filename = NULL; + if (asprintf(&stat_filename, "%s%s", s_memory_cgroup_path, CGROUP_MEMORY_STAT_FILENAME) < 0) + return FALSE; + + FILE *stat_file = fopen(stat_filename, "r"); + free(stat_filename); + if (stat_file == NULL) + return FALSE; + + char *line = NULL; + size_t lineLen = 0; + size_t readValues = 0; + char *endptr; + + *val = 0; + while (getline(&line, &lineLen, stat_file) != -1 && readValues < s_mem_stat_n_keys) { + for (size_t i = 0; i < s_mem_stat_n_keys; i++) { + if (strncmp(line, s_mem_stat_key_names[i], s_mem_stat_key_lengths[i]) == 0) { + errno = 0; + const char *startptr = line + s_mem_stat_key_lengths[i]; + *val += strtoll(startptr, &endptr, 10); + if (endptr != startptr && errno == 0) + readValues++; + + break; + } + } + } + + fclose(stat_file); + free(line); + + if (readValues == s_mem_stat_n_keys) + return TRUE; + + return FALSE; +} + +/** + * @getRestrictedPhysicalMemoryLimit + * + * @returns Physical memory limit + * + * Determine if there are any limits on memory and return the value + * if so. Zero represents no limit. + */ +size_t +getRestrictedPhysicalMemoryLimit() +{ + guint64 physical_memory_limit = 0; + + if (s_mem_stat_n_keys == 0) + initialize(); + + if (!getPhysicalMemoryLimit(&physical_memory_limit)) + return 0; + + // If there's no memory limit specified on the container this + // actually returns 0x7FFFFFFFFFFFF000 (2^63-1 rounded down to + // 4k which is a common page size). So we know we are not + // running in a memory restricted environment. + if (physical_memory_limit > 0x7FFFFFFF00000000) + return 0; + + return (getPhysicalMemoryTotal(physical_memory_limit)); +} + +/** + * @getPhysicalMemoryTotal + * + * @param[in] physical_memory_limit - The max memory on the system + * @returns Physical memory total + * + * Check the input limit against any system limits or actual memory on system + */ +static size_t +getPhysicalMemoryTotal(size_t physical_memory_limit) +{ + struct rlimit curr_rlimit; + size_t rlimit_soft_limit = (size_t)RLIM_INFINITY; + if (getrlimit(RLIMIT_AS, &curr_rlimit) == 0) + rlimit_soft_limit = curr_rlimit.rlim_cur; + physical_memory_limit = (physical_memory_limit < rlimit_soft_limit) ? + physical_memory_limit : rlimit_soft_limit; + + // Ensure that limit is not greater than real memory size + long pages = sysconf(_SC_PHYS_PAGES); + if (pages != -1) { + if (pageSize != -1) { + physical_memory_limit = (physical_memory_limit < (size_t)pages * pageSize) ? + physical_memory_limit : (size_t)pages * pageSize; + } + } + + if (physical_memory_limit > ULONG_MAX) { + // It is observed in practice when the memory is unrestricted, Linux control + // group returns a physical limit that is bigger than the address space + return ULONG_MAX; + } else + return (size_t)physical_memory_limit; +} + +/** + * @getPhysicalMemoryUsed + * + * @param[out] val - pointer to the memory usage value + * @returns True if we are able to determine usage + * + * Determine the amount of memory in use + */ +gboolean +getPhysicalMemoryUsed(size_t *val) +{ + gboolean result = FALSE; + size_t linelen; + char *line = NULL; + + if (val == NULL) + return FALSE; + + // Linux uses cgroup usage to trigger oom kills. + if (getPhysicalMemoryUsage(val)) + return TRUE; + + // process resident set size. + FILE* file = fopen(PROC_STATM_FILENAME, "r"); + if (file != NULL && getline(&line, &linelen, file) != -1) { + char* context = NULL; + char* strTok = strtok_r(line, " ", &context); + strTok = strtok_r(NULL, " ", &context); + + errno = 0; + *val = strtoull(strTok, NULL, 0); + if (errno == 0) { + if (pageSize != -1) { + *val = *val * pageSize; + result = TRUE; + } + } + } + + if (file) + fclose(file); + free(line); + return result; +} + +/** + * @getPhysicalMemoryAvail + * + * @returns Amount of memory available + * + * Determine the amount of memory available by examininig any limits and + * checking what memory is in use. + */ +size_t +getPhysicalMemoryAvail() +{ + size_t max, used, avail, sysAvail; + + max = getRestrictedPhysicalMemoryLimit(); + + if (max == 0) + max = getPhysicalMemoryTotal(ULONG_MAX); + + if (getPhysicalMemoryUsed(&used)) + avail = max - used; + else + avail = max; + + sysAvail = sysconf(_SC_AVPHYS_PAGES) * pageSize; + return (avail < sysAvail ? avail : sysAvail); +} Index: a/src/mono/mono/utils/CMakeLists.txt =================================================================== --- a/src/mono/mono/utils/CMakeLists.txt +++ b/src/mono/mono/utils/CMakeLists.txt @@ -32,6 +32,7 @@ set(utils_common_sources mono-sha1.c mono-logger.c mono-logger-internals.h + mono-cgroup.c mono-codeman.c mono-counters.c mono-compiler.h