From 09152fda3e7ab509f1c7b6c8d27157e8b5dde5e9 Mon Sep 17 00:00:00 2001 From: Angus Salkeld Date: Mon, 6 Feb 2012 22:40:21 +1100 Subject: [PATCH] Fix #787196 Signed-off-by: Angus Salkeld --- 0001-LOOP-fix-the-todo-calculations.patch | 98 ++++++++++++++ ...-for-a-single-job-causing-a-cpu-spin.patch | 127 ++++++++++++++++++ ...ent-jobs-from-consuming-too-much-cpu.patch | 58 ++++++++ libqb.spec | 14 +- 4 files changed, 294 insertions(+), 3 deletions(-) create mode 100644 0001-LOOP-fix-the-todo-calculations.patch create mode 100644 0002-TEST-check-for-a-single-job-causing-a-cpu-spin.patch create mode 100644 0003-LOOP-prevent-jobs-from-consuming-too-much-cpu.patch diff --git a/0001-LOOP-fix-the-todo-calculations.patch b/0001-LOOP-fix-the-todo-calculations.patch new file mode 100644 index 0000000..140cbc0 --- /dev/null +++ b/0001-LOOP-fix-the-todo-calculations.patch @@ -0,0 +1,98 @@ +From 1f269e7bfc916f2ae852e8a7faa591af74c59f05 Mon Sep 17 00:00:00 2001 +From: Angus Salkeld +Date: Mon, 6 Feb 2012 22:31:55 +1100 +Subject: [PATCH 1/3] LOOP: fix the todo calculations. + +Signed-off-by: Angus Salkeld +--- + lib/loop.c | 22 ++++++++++++---------- + lib/loop_job.c | 5 ++++- + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/lib/loop.c b/lib/loop.c +index 142e7eb..45ceb3f 100644 +--- a/lib/loop.c ++++ b/lib/loop.c +@@ -26,7 +26,7 @@ + #include "loop_int.h" + #include "util_int.h" + +-static int32_t ++static void + qb_loop_run_level(struct qb_loop_level *level) + { + struct qb_loop_item *job; +@@ -44,13 +44,12 @@ Ill_have_another: + level->todo--; + processed++; + if (level->l->stop_requested) { +- return processed; ++ return; + } + if (processed < level->to_process) { + goto Ill_have_another; + } + } +- return processed; + } + + void +@@ -139,20 +138,23 @@ qb_loop_run(struct qb_loop *l) + if (todo > 0) { + ms_timeout = 0; + } else { +- todo = 0; + if (l->timer_source) { + ms_timeout = qb_loop_timer_msec_duration_to_expire(l->timer_source); + } else { + ms_timeout = -1; + } + } +- todo += l->fd_source->poll(l->fd_source, ms_timeout); +- +- for (p = QB_LOOP_HIGH; p >= p_stop; p--) { +- todo -= qb_loop_run_level(&l->level[p]); +- if (l->stop_requested) { +- return; ++ (void)l->fd_source->poll(l->fd_source, ms_timeout); ++ ++ todo = 0; ++ for (p = QB_LOOP_HIGH; p >= QB_LOOP_LOW; p--) { ++ if (p >= p_stop) { ++ qb_loop_run_level(&l->level[p]); ++ if (l->stop_requested) { ++ return; ++ } + } ++ todo += l->level[p].todo; + } + } while (!l->stop_requested); + } +diff --git a/lib/loop_job.c b/lib/loop_job.c +index c17663d..123d1af 100644 +--- a/lib/loop_job.c ++++ b/lib/loop_job.c +@@ -48,16 +48,19 @@ get_more_jobs(struct qb_loop_source *s, int32_t ms_timeout) + { + int32_t p; + int32_t new_jobs = 0; ++ int32_t level_jobs = 0; + + /* + * this is simple, move jobs from wait_head to job_head + */ + for (p = QB_LOOP_LOW; p <= QB_LOOP_HIGH; p++) { + if (!qb_list_empty(&s->l->level[p].wait_head)) { +- new_jobs += qb_list_length(&s->l->level[p].wait_head); ++ level_jobs = qb_list_length(&s->l->level[p].wait_head); ++ new_jobs += level_jobs; + qb_list_splice(&s->l->level[p].wait_head, + &s->l->level[p].job_head); + qb_list_init(&s->l->level[p].wait_head); ++ s->l->level[p].todo += level_jobs; + } + } + return new_jobs; +-- +1.7.7.6 + diff --git a/0002-TEST-check-for-a-single-job-causing-a-cpu-spin.patch b/0002-TEST-check-for-a-single-job-causing-a-cpu-spin.patch new file mode 100644 index 0000000..28c7092 --- /dev/null +++ b/0002-TEST-check-for-a-single-job-causing-a-cpu-spin.patch @@ -0,0 +1,127 @@ +From 7d2739fdd23e118bbaa42b3c43381b88ea842c88 Mon Sep 17 00:00:00 2001 +From: Angus Salkeld +Date: Sat, 4 Feb 2012 12:28:45 +1100 +Subject: [PATCH 2/3] TEST: check for a single job causing a cpu spin + +Signed-off-by: Angus Salkeld +--- + tests/check_loop.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 files changed, 61 insertions(+), 2 deletions(-) + +diff --git a/tests/check_loop.c b/tests/check_loop.c +index 50e80da..eb79a51 100644 +--- a/tests/check_loop.c ++++ b/tests/check_loop.c +@@ -60,6 +60,7 @@ static void job_1_r(void *data) + res = qb_loop_job_add(l, QB_LOOP_MED, data, job_2); + ck_assert_int_eq(res, 0); + } ++ + static void job_1_add_nuts(void *data) + { + int32_t res; +@@ -153,7 +154,6 @@ START_TEST(test_loop_job_4) + } + END_TEST + +- + START_TEST(test_loop_job_nuts) + { + int32_t res; +@@ -169,6 +169,53 @@ START_TEST(test_loop_job_nuts) + } + END_TEST + ++static qb_util_stopwatch_t *rl_sw; ++#define RATE_LIMIT_RUNTIME_SEC 3 ++ ++static void job_add_self(void *data) ++{ ++ int32_t res; ++ uint64_t elapsed1; ++ qb_loop_t *l = (qb_loop_t *)data; ++ ++ job_1_run_count++; ++ qb_util_stopwatch_stop(rl_sw); ++ elapsed1 = qb_util_stopwatch_us_elapsed_get(rl_sw); ++ if (elapsed1 > (RATE_LIMIT_RUNTIME_SEC * QB_TIME_US_IN_SEC)) { ++ /* run for 3 seconds */ ++ qb_loop_stop(l); ++ return; ++ } ++ res = qb_loop_job_add(l, QB_LOOP_MED, data, job_add_self); ++ ck_assert_int_eq(res, 0); ++} ++ ++START_TEST(test_job_rate_limit) ++{ ++ int32_t res; ++ qb_loop_t *l = qb_loop_create(); ++ fail_if(l == NULL); ++ ++ rl_sw = qb_util_stopwatch_create(); ++ fail_if(rl_sw == NULL); ++ ++ qb_util_stopwatch_start(rl_sw); ++ ++ res = qb_loop_job_add(l, QB_LOOP_MED, l, job_add_self); ++ ck_assert_int_eq(res, 0); ++ ++ qb_loop_run(l); ++ /* ++ * the test is to confirm that a single job does not run away ++ * and cause cpu spin. We are going to say that a spin is more than ++ * one job per 50ms if there is only one job pending in the loop. ++ */ ++ _ck_assert_int(job_1_run_count, <, (RATE_LIMIT_RUNTIME_SEC * (QB_TIME_MS_IN_SEC/50)) + 1); ++ qb_loop_destroy(l); ++ qb_util_stopwatch_free(rl_sw); ++} ++END_TEST ++ + static Suite *loop_job_suite(void) + { + TCase *tc; +@@ -188,6 +235,12 @@ static Suite *loop_job_suite(void) + + tc = tcase_create("run_500"); + tcase_add_test(tc, test_loop_job_nuts); ++ tcase_set_timeout(tc, 5); ++ suite_add_tcase(s, tc); ++ ++ tc = tcase_create("rate_limit"); ++ tcase_add_test(tc, test_job_rate_limit); ++ tcase_set_timeout(tc, 5); + suite_add_tcase(s, tc); + + return s; +@@ -465,12 +518,17 @@ static Suite *loop_timer_suite(void) + tcase_add_test(tc, test_loop_timer_expire_leak); + tcase_set_timeout(tc, 30); + suite_add_tcase(s, tc); ++ return s; ++} + ++static Suite *loop_signal_suite(void) ++{ ++ TCase *tc; ++ Suite *s = suite_create("loop_signal_suite"); + tc = tcase_create("signals"); + tcase_add_test(tc, test_loop_sig_handling); + tcase_set_timeout(tc, 10); + suite_add_tcase(s, tc); +- + return s; + } + +@@ -479,6 +537,7 @@ int32_t main(void) + int32_t number_failed; + SRunner *sr = srunner_create(loop_job_suite()); + srunner_add_suite (sr, loop_timer_suite()); ++ srunner_add_suite (sr, loop_signal_suite()); + + qb_log_init("check", LOG_USER, LOG_EMERG); + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); +-- +1.7.7.6 + diff --git a/0003-LOOP-prevent-jobs-from-consuming-too-much-cpu.patch b/0003-LOOP-prevent-jobs-from-consuming-too-much-cpu.patch new file mode 100644 index 0000000..b1fa851 --- /dev/null +++ b/0003-LOOP-prevent-jobs-from-consuming-too-much-cpu.patch @@ -0,0 +1,58 @@ +From cf198dc03e51439da9e824abb7acc29c30e691ae Mon Sep 17 00:00:00 2001 +From: Angus Salkeld +Date: Mon, 6 Feb 2012 22:35:30 +1100 +Subject: [PATCH 3/3] LOOP: prevent jobs from consuming too much cpu. + +Signed-off-by: Angus Salkeld +--- + lib/loop.c | 23 ++++++++++++++++++++--- + 1 files changed, 20 insertions(+), 3 deletions(-) + +diff --git a/lib/loop.c b/lib/loop.c +index 45ceb3f..3e10b89 100644 +--- a/lib/loop.c ++++ b/lib/loop.c +@@ -118,6 +118,8 @@ qb_loop_run(struct qb_loop *l) + int32_t p; + int32_t p_stop = QB_LOOP_LOW; + int32_t todo = 0; ++ int32_t job_todo; ++ int32_t timer_todo; + int32_t ms_timeout; + + l->stop_requested = QB_FALSE; +@@ -130,13 +132,28 @@ qb_loop_run(struct qb_loop *l) + } + + if (l->job_source && l->job_source->poll) { +- todo += l->job_source->poll(l->job_source, 0); ++ job_todo = l->job_source->poll(l->job_source, 0); ++ } else { ++ job_todo = 0; + } + if (l->timer_source && l->timer_source->poll) { +- todo += l->timer_source->poll(l->timer_source, 0); ++ timer_todo = l->timer_source->poll(l->timer_source, 0); ++ } else { ++ timer_todo = 0; + } +- if (todo > 0) { ++ if (todo > 0 || timer_todo > 0) { ++ /* ++ * if there are old todos or timer todos then don't wait. ++ */ + ms_timeout = 0; ++ } else if (job_todo > 0) { ++ todo = 0; ++ /* ++ * if we only have jobs to do (not timers or old todos) ++ * then set a non-zero timeout. Jobs can spin out of ++ * control if someone keeps adding them. ++ */ ++ ms_timeout = 50; + } else { + if (l->timer_source) { + ms_timeout = qb_loop_timer_msec_duration_to_expire(l->timer_source); +-- +1.7.7.6 + diff --git a/libqb.spec b/libqb.spec index f648f29..d353e0a 100644 --- a/libqb.spec +++ b/libqb.spec @@ -1,6 +1,6 @@ Name: libqb Version: 0.9.0 -Release: 1%{?dist} +Release: 2%{?dist} Summary: An IPC library for high performance servers Group: System Environment/Libraries @@ -9,7 +9,9 @@ URL: http://www.libqb.org Source0: https://fedorahosted.org/releases/q/u/quarterback/%{name}-%{version}.tar.xz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -#Patch1: +Patch1: 0001-LOOP-fix-the-todo-calculations.patch +Patch2: 0002-TEST-check-for-a-single-job-causing-a-cpu-spin.patch +Patch3: 0003-LOOP-prevent-jobs-from-consuming-too-much-cpu.patch BuildRequires: libtool doxygen procps check-devel @@ -22,7 +24,9 @@ Initially these are IPC and poll. %prep %setup -q -#%patch1 -p1 +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 %build %configure --disable-static @@ -67,6 +71,10 @@ developing applications that use %{name}. %{_mandir}/man3/qb*3* %changelog +* Mon Feb 06 2012 Angus Salkeld - 0.9.0-2 +- Fix a spin in the mainloop when a timer or poll gets removed + When in the job queue (#787196). + * Fri Jan 27 2012 Angus Salkeld - 0.9.0-1 - Rebased to 0.9.0