From 072aa720ff84eaf2f2826c1d9875bede30be47cc Mon Sep 17 00:00:00 2001 From: "Brian C. Lane" Date: Wed, 7 Feb 2018 16:33:33 -0800 Subject: [PATCH] Add /compose/cancel API to cancel a running build If the build hasn't started yet (state is WAITING) try removing the symlink to it. If this succeeds, delete the partial results directory. If the build makes it to RUNNING then it writes a CANCEL file in the results directory. The callback that is passed to execWithRedirect catches this, causing a SIGTERM to be sent to anaconda. It then exits and cleanup happens normally. The partial results directory is then removed. --- src/pylorax/api/queue.py | 78 +++++++++++++++++++++++++++++++++++----- src/pylorax/api/v0.py | 30 +++++++++++++++- 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/src/pylorax/api/queue.py b/src/pylorax/api/queue.py index 8d4113e1..1a7e2958 100644 --- a/src/pylorax/api/queue.py +++ b/src/pylorax/api/queue.py @@ -71,7 +71,12 @@ def monitor(cfg, cancel_q): else: src = joinpaths(cfg.composer_dir, "queue/new", jobs[0]) dst = joinpaths(cfg.composer_dir, "queue/run", jobs[0]) - os.rename(src, dst) + try: + os.rename(src, dst) + except OSError: + # The symlink may vanish if uuid_cancel() has been called + continue + log.info("Starting new compose: %s", dst) open(joinpaths(dst, "STATUS"), "w").write("RUNNING\n") @@ -114,14 +119,20 @@ def make_compose(cfg, results_dir): for f in ["/tmp/NOSAVE_INPUT_KS", "/tmp/NOSAVE_LOGS"]: open(f, "w") - log.debug("repo_url = %s, cfg = %s", repo_url, install_cfg) - novirt_install(install_cfg, joinpaths(results_dir, install_cfg.image_name), None, repo_url) + # Placing a CANCEL file in the results directory will make execWithRedirect send anaconda a SIGTERM + def cancel_build(): + return os.path.exists(joinpaths(results_dir, "CANCEL")) - # Make sure that everything under the results directory is owned by the user - user = pwd.getpwuid(cfg.uid).pw_name - group = grp.getgrgid(cfg.gid).gr_name - log.debug("Install finished, chowning results to %s:%s", user, group) - subprocess.call(["chown", "-R", "%s:%s" % (user, group), results_dir]) + log.debug("repo_url = %s, cfg = %s", repo_url, install_cfg) + try: + novirt_install(install_cfg, joinpaths(results_dir, install_cfg.image_name), None, repo_url, + callback_func=cancel_build) + finally: + # Make sure that everything under the results directory is owned by the user + user = pwd.getpwuid(cfg.uid).pw_name + group = grp.getgrgid(cfg.gid).gr_name + log.debug("Install finished, chowning results to %s:%s", user, group) + subprocess.call(["chown", "-R", "%s:%s" % (user, group), results_dir]) def get_compose_type(results_dir): """ Return the type of composition. @@ -212,6 +223,57 @@ def build_status(cfg, status_filter=None): results.append(compose_detail(build)) return results +def uuid_cancel(cfg, uuid): + """Cancel a build and delete its results + + :param cfg: Configuration settings + :type cfg: ComposerConfig + :param uuid: The UUID of the build + :type uuid: str + :returns: True if it was canceled and deleted + :rtype: bool + + Only call this if the build status is WAITING or RUNNING + """ + # This status can change (and probably will) while it is in the middle of doing this: + # It can move from WAITING -> RUNNING or it can move from RUNNING -> FINISHED|FAILED + + # If it is in WAITING remove the symlink and then check to make sure it didn't show up + # in RUNNING + queue_dir = joinpaths(cfg.get("composer", "lib_dir"), "queue") + uuid_new = joinpaths(queue_dir, "new", uuid) + if os.path.exists(uuid_new): + try: + os.unlink(uuid_new) + except OSError: + # The symlink may vanish if the queue monitor started the build + pass + uuid_run = joinpaths(queue_dir, "run", uuid) + if not os.path.exists(uuid_run): + # Successfully removed it before the build started + return uuid_delete(cfg, uuid) + + # Tell the build to stop running + cancel_path = joinpaths(cfg.get("composer", "lib_dir"), "results", uuid, "CANCEL") + open(cancel_path, "w").write("\n") + + # Wait for status to move to FAILED + started = time.time() + while True: + status = uuid_status(cfg, uuid) + if status["queue_status"] == "FAILED": + break + + # Is this taking too long? Exit anyway and try to cleanup. + if time.time() > started + (10 * 60): + log.error("Failed to cancel the build of %s", uuid) + break + + time.sleep(5) + + # Remove the partial results + uuid_delete(cfg, uuid) + def uuid_delete(cfg, uuid): """Delete all of the results from a compose diff --git a/src/pylorax/api/v0.py b/src/pylorax/api/v0.py index 16f95f6b..0e6ea372 100644 --- a/src/pylorax/api/v0.py +++ b/src/pylorax/api/v0.py @@ -719,6 +719,19 @@ POST `/api/v0/recipes/tag/` ] } +DELETE `/api/v0/recipes/cancel/` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Cancel the build, if it is not finished, and delete the results. It will return a + status of True if it is successful. + + Example:: + + { + "status": true, + "uuid": "03397f8d-acff-4cdb-bd31-f629b7a948f5" + } + DELETE `/api/v0/compose/delete/` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -832,7 +845,7 @@ from pylorax.api.crossdomain import crossdomain from pylorax.api.projects import projects_list, projects_info, projects_depsolve from pylorax.api.projects import modules_list, modules_info, ProjectsError from pylorax.api.queue import queue_status, build_status, uuid_delete, uuid_status, uuid_info -from pylorax.api.queue import uuid_tar, uuid_image +from pylorax.api.queue import uuid_tar, uuid_image, uuid_cancel from pylorax.api.recipes import list_branch_files, read_recipe_commit, recipe_filename, list_commits from pylorax.api.recipes import recipe_from_dict, recipe_from_toml, commit_recipe, delete_recipe, revert_recipe from pylorax.api.recipes import tag_recipe_commit, recipe_diff @@ -1364,6 +1377,21 @@ def v0_api(api): return jsonify(uuids=results) + @api.route("/api/v0/compose/cancel/", methods=["DELETE"]) + @crossdomain(origin="*") + def v0_compose_cancel(uuid): + """Cancel a running compose and delete its results directory""" + status = uuid_status(api.config["COMPOSER_CFG"], uuid) + if status["queue_status"] not in ["WAITING", "RUNNING"]: + return jsonify({"status": False, "uuid": uuid, "msg": "Cannot cancel a build that is in the %s state" % status["queue_status"]}) + + try: + uuid_cancel(api.config["COMPOSER_CFG"], uuid) + except Exception as e: + return jsonify({"status": False, "uuid": uuid, "msg": str(e)}) + else: + return jsonify({"status": True, "uuid": uuid}) + @api.route("/api/v0/compose/delete/", methods=["DELETE"]) @crossdomain(origin="*") def v0_compose_delete(uuids):