Add /compose/cancel API to cancel a running build

If the build hasn't started yet (state is WAITING) try removing the
symlink to it. If this succeeds, delete the partial results directory.

If the build makes it to RUNNING then it writes a CANCEL file in the
results directory. The callback that is passed to execWithRedirect
catches this, causing a SIGTERM to be sent to anaconda. It then exits
and cleanup happens normally. The partial results directory is then
removed.
This commit is contained in:
Brian C. Lane 2018-02-07 16:33:33 -08:00
parent 8f0bca00c0
commit 072aa720ff
2 changed files with 99 additions and 9 deletions

View File

@ -71,7 +71,12 @@ def monitor(cfg, cancel_q):
else:
src = joinpaths(cfg.composer_dir, "queue/new", jobs[0])
dst = joinpaths(cfg.composer_dir, "queue/run", jobs[0])
os.rename(src, dst)
try:
os.rename(src, dst)
except OSError:
# The symlink may vanish if uuid_cancel() has been called
continue
log.info("Starting new compose: %s", dst)
open(joinpaths(dst, "STATUS"), "w").write("RUNNING\n")
@ -114,14 +119,20 @@ def make_compose(cfg, results_dir):
for f in ["/tmp/NOSAVE_INPUT_KS", "/tmp/NOSAVE_LOGS"]:
open(f, "w")
log.debug("repo_url = %s, cfg = %s", repo_url, install_cfg)
novirt_install(install_cfg, joinpaths(results_dir, install_cfg.image_name), None, repo_url)
# Placing a CANCEL file in the results directory will make execWithRedirect send anaconda a SIGTERM
def cancel_build():
return os.path.exists(joinpaths(results_dir, "CANCEL"))
# Make sure that everything under the results directory is owned by the user
user = pwd.getpwuid(cfg.uid).pw_name
group = grp.getgrgid(cfg.gid).gr_name
log.debug("Install finished, chowning results to %s:%s", user, group)
subprocess.call(["chown", "-R", "%s:%s" % (user, group), results_dir])
log.debug("repo_url = %s, cfg = %s", repo_url, install_cfg)
try:
novirt_install(install_cfg, joinpaths(results_dir, install_cfg.image_name), None, repo_url,
callback_func=cancel_build)
finally:
# Make sure that everything under the results directory is owned by the user
user = pwd.getpwuid(cfg.uid).pw_name
group = grp.getgrgid(cfg.gid).gr_name
log.debug("Install finished, chowning results to %s:%s", user, group)
subprocess.call(["chown", "-R", "%s:%s" % (user, group), results_dir])
def get_compose_type(results_dir):
""" Return the type of composition.
@ -212,6 +223,57 @@ def build_status(cfg, status_filter=None):
results.append(compose_detail(build))
return results
def uuid_cancel(cfg, uuid):
"""Cancel a build and delete its results
:param cfg: Configuration settings
:type cfg: ComposerConfig
:param uuid: The UUID of the build
:type uuid: str
:returns: True if it was canceled and deleted
:rtype: bool
Only call this if the build status is WAITING or RUNNING
"""
# This status can change (and probably will) while it is in the middle of doing this:
# It can move from WAITING -> RUNNING or it can move from RUNNING -> FINISHED|FAILED
# If it is in WAITING remove the symlink and then check to make sure it didn't show up
# in RUNNING
queue_dir = joinpaths(cfg.get("composer", "lib_dir"), "queue")
uuid_new = joinpaths(queue_dir, "new", uuid)
if os.path.exists(uuid_new):
try:
os.unlink(uuid_new)
except OSError:
# The symlink may vanish if the queue monitor started the build
pass
uuid_run = joinpaths(queue_dir, "run", uuid)
if not os.path.exists(uuid_run):
# Successfully removed it before the build started
return uuid_delete(cfg, uuid)
# Tell the build to stop running
cancel_path = joinpaths(cfg.get("composer", "lib_dir"), "results", uuid, "CANCEL")
open(cancel_path, "w").write("\n")
# Wait for status to move to FAILED
started = time.time()
while True:
status = uuid_status(cfg, uuid)
if status["queue_status"] == "FAILED":
break
# Is this taking too long? Exit anyway and try to cleanup.
if time.time() > started + (10 * 60):
log.error("Failed to cancel the build of %s", uuid)
break
time.sleep(5)
# Remove the partial results
uuid_delete(cfg, uuid)
def uuid_delete(cfg, uuid):
"""Delete all of the results from a compose

View File

@ -719,6 +719,19 @@ POST `/api/v0/recipes/tag/<recipe_name>`
]
}
DELETE `/api/v0/recipes/cancel/<uuid>`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Cancel the build, if it is not finished, and delete the results. It will return a
status of True if it is successful.
Example::
{
"status": true,
"uuid": "03397f8d-acff-4cdb-bd31-f629b7a948f5"
}
DELETE `/api/v0/compose/delete/<uuids>`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -832,7 +845,7 @@ from pylorax.api.crossdomain import crossdomain
from pylorax.api.projects import projects_list, projects_info, projects_depsolve
from pylorax.api.projects import modules_list, modules_info, ProjectsError
from pylorax.api.queue import queue_status, build_status, uuid_delete, uuid_status, uuid_info
from pylorax.api.queue import uuid_tar, uuid_image
from pylorax.api.queue import uuid_tar, uuid_image, uuid_cancel
from pylorax.api.recipes import list_branch_files, read_recipe_commit, recipe_filename, list_commits
from pylorax.api.recipes import recipe_from_dict, recipe_from_toml, commit_recipe, delete_recipe, revert_recipe
from pylorax.api.recipes import tag_recipe_commit, recipe_diff
@ -1364,6 +1377,21 @@ def v0_api(api):
return jsonify(uuids=results)
@api.route("/api/v0/compose/cancel/<uuid>", methods=["DELETE"])
@crossdomain(origin="*")
def v0_compose_cancel(uuid):
"""Cancel a running compose and delete its results directory"""
status = uuid_status(api.config["COMPOSER_CFG"], uuid)
if status["queue_status"] not in ["WAITING", "RUNNING"]:
return jsonify({"status": False, "uuid": uuid, "msg": "Cannot cancel a build that is in the %s state" % status["queue_status"]})
try:
uuid_cancel(api.config["COMPOSER_CFG"], uuid)
except Exception as e:
return jsonify({"status": False, "uuid": uuid, "msg": str(e)})
else:
return jsonify({"status": True, "uuid": uuid})
@api.route("/api/v0/compose/delete/<uuids>", methods=["DELETE"])
@crossdomain(origin="*")
def v0_compose_delete(uuids):