forked from rpms/cloud-init
297 lines
12 KiB
Diff
297 lines
12 KiB
Diff
From 2e070086275341dfceb6d5b1e12f06f22e7bbfcd Mon Sep 17 00:00:00 2001
|
|
From: Eduardo Otubo <otubo@redhat.com>
|
|
Date: Wed, 23 Jan 2019 12:30:21 +0100
|
|
Subject: net: Wait for dhclient to daemonize before reading lease file
|
|
|
|
RH-Author: Eduardo Otubo <otubo@redhat.com>
|
|
Message-id: <20190123123021.32708-1-otubo@redhat.com>
|
|
Patchwork-id: 84095
|
|
O-Subject: [RHEL-7.7 cloud-init PATCH] net: Wait for dhclient to daemonize before reading lease file
|
|
Bugzilla: 1632967
|
|
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
|
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
|
|
Brew: https://bugzilla.redhat.com/show_bug.cgi?id=1632967
|
|
Tested: Me and upstream
|
|
|
|
commit fdadcb5fae51f4e6799314ab98e3aec56c79b17c
|
|
Author: Jason Zions <jasonzio@microsoft.com>
|
|
Date: Tue Jan 15 21:37:17 2019 +0000
|
|
|
|
net: Wait for dhclient to daemonize before reading lease file
|
|
|
|
cloud-init uses dhclient to fetch the DHCP lease so it can extract
|
|
DHCP options. dhclient creates the leasefile, then writes to it;
|
|
simply waiting for the leasefile to appear creates a race between
|
|
dhclient and cloud-init. Instead, wait for dhclient to be parented by
|
|
init. At that point, we know it has written to the leasefile, so it's
|
|
safe to copy the file and kill the process.
|
|
|
|
cloud-init creates a temporary directory in which to execute dhclient,
|
|
and deletes that directory after it has killed the process. If
|
|
cloud-init abandons waiting for dhclient to daemonize, it will still
|
|
attempt to delete the temporary directory, but will not report an
|
|
exception should that attempt fail.
|
|
|
|
LP: #1794399
|
|
|
|
Signed-off-by: Eduardo Otubo <otubo@redhat.com>
|
|
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
---
|
|
cloudinit/net/dhcp.py | 44 +++++++++++++++++++++---------
|
|
cloudinit/net/tests/test_dhcp.py | 15 ++++++++--
|
|
cloudinit/temp_utils.py | 4 +--
|
|
cloudinit/tests/test_temp_utils.py | 18 +++++++++++-
|
|
cloudinit/util.py | 16 ++++++++++-
|
|
tests/unittests/test_util.py | 6 ++++
|
|
6 files changed, 83 insertions(+), 20 deletions(-)
|
|
|
|
diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
|
|
index 0db991db..c98a97cd 100644
|
|
--- a/cloudinit/net/dhcp.py
|
|
+++ b/cloudinit/net/dhcp.py
|
|
@@ -9,6 +9,7 @@ import logging
|
|
import os
|
|
import re
|
|
import signal
|
|
+import time
|
|
|
|
from cloudinit.net import (
|
|
EphemeralIPv4Network, find_fallback_nic, get_devicelist,
|
|
@@ -127,7 +128,9 @@ def maybe_perform_dhcp_discovery(nic=None):
|
|
if not dhclient_path:
|
|
LOG.debug('Skip dhclient configuration: No dhclient command found.')
|
|
return []
|
|
- with temp_utils.tempdir(prefix='cloud-init-dhcp-', needs_exe=True) as tdir:
|
|
+ with temp_utils.tempdir(rmtree_ignore_errors=True,
|
|
+ prefix='cloud-init-dhcp-',
|
|
+ needs_exe=True) as tdir:
|
|
# Use /var/tmp because /run/cloud-init/tmp is mounted noexec
|
|
return dhcp_discovery(dhclient_path, nic, tdir)
|
|
|
|
@@ -195,24 +198,39 @@ def dhcp_discovery(dhclient_cmd_path, interface, cleandir):
|
|
'-pf', pid_file, interface, '-sf', '/bin/true']
|
|
util.subp(cmd, capture=True)
|
|
|
|
- # dhclient doesn't write a pid file until after it forks when it gets a
|
|
- # proper lease response. Since cleandir is a temp directory that gets
|
|
- # removed, we need to wait for that pidfile creation before the
|
|
- # cleandir is removed, otherwise we get FileNotFound errors.
|
|
+ # Wait for pid file and lease file to appear, and for the process
|
|
+ # named by the pid file to daemonize (have pid 1 as its parent). If we
|
|
+ # try to read the lease file before daemonization happens, we might try
|
|
+ # to read it before the dhclient has actually written it. We also have
|
|
+ # to wait until the dhclient has become a daemon so we can be sure to
|
|
+ # kill the correct process, thus freeing cleandir to be deleted back
|
|
+ # up the callstack.
|
|
missing = util.wait_for_files(
|
|
[pid_file, lease_file], maxwait=5, naplen=0.01)
|
|
if missing:
|
|
LOG.warning("dhclient did not produce expected files: %s",
|
|
', '.join(os.path.basename(f) for f in missing))
|
|
return []
|
|
- pid_content = util.load_file(pid_file).strip()
|
|
- try:
|
|
- pid = int(pid_content)
|
|
- except ValueError:
|
|
- LOG.debug(
|
|
- "pid file contains non-integer content '%s'", pid_content)
|
|
- else:
|
|
- os.kill(pid, signal.SIGKILL)
|
|
+
|
|
+ ppid = 'unknown'
|
|
+ for _ in range(0, 1000):
|
|
+ pid_content = util.load_file(pid_file).strip()
|
|
+ try:
|
|
+ pid = int(pid_content)
|
|
+ except ValueError:
|
|
+ pass
|
|
+ else:
|
|
+ ppid = util.get_proc_ppid(pid)
|
|
+ if ppid == 1:
|
|
+ LOG.debug('killing dhclient with pid=%s', pid)
|
|
+ os.kill(pid, signal.SIGKILL)
|
|
+ return parse_dhcp_lease_file(lease_file)
|
|
+ time.sleep(0.01)
|
|
+
|
|
+ LOG.error(
|
|
+ 'dhclient(pid=%s, parentpid=%s) failed to daemonize after %s seconds',
|
|
+ pid_content, ppid, 0.01 * 1000
|
|
+ )
|
|
return parse_dhcp_lease_file(lease_file)
|
|
|
|
|
|
diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
|
|
index cd3e7328..79e8842f 100644
|
|
--- a/cloudinit/net/tests/test_dhcp.py
|
|
+++ b/cloudinit/net/tests/test_dhcp.py
|
|
@@ -145,16 +145,20 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
|
|
dhcp_discovery(dhclient_script, 'eth9', tmpdir))
|
|
self.assertIn(
|
|
- "pid file contains non-integer content ''", self.logs.getvalue())
|
|
+ "dhclient(pid=, parentpid=unknown) failed "
|
|
+ "to daemonize after 10.0 seconds",
|
|
+ self.logs.getvalue())
|
|
m_kill.assert_not_called()
|
|
|
|
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
|
|
@mock.patch('cloudinit.net.dhcp.os.kill')
|
|
@mock.patch('cloudinit.net.dhcp.util.wait_for_files')
|
|
@mock.patch('cloudinit.net.dhcp.util.subp')
|
|
def test_dhcp_discovery_run_in_sandbox_waits_on_lease_and_pid(self,
|
|
m_subp,
|
|
m_wait,
|
|
- m_kill):
|
|
+ m_kill,
|
|
+ m_getppid):
|
|
"""dhcp_discovery waits for the presence of pidfile and dhcp.leases."""
|
|
tmpdir = self.tmp_dir()
|
|
dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
|
|
@@ -164,6 +168,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
pidfile = self.tmp_path('dhclient.pid', tmpdir)
|
|
leasefile = self.tmp_path('dhcp.leases', tmpdir)
|
|
m_wait.return_value = [pidfile] # Return the missing pidfile wait for
|
|
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
|
|
self.assertEqual([], dhcp_discovery(dhclient_script, 'eth9', tmpdir))
|
|
self.assertEqual(
|
|
mock.call([pidfile, leasefile], maxwait=5, naplen=0.01),
|
|
@@ -173,9 +178,10 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
self.logs.getvalue())
|
|
m_kill.assert_not_called()
|
|
|
|
+ @mock.patch('cloudinit.net.dhcp.util.get_proc_ppid')
|
|
@mock.patch('cloudinit.net.dhcp.os.kill')
|
|
@mock.patch('cloudinit.net.dhcp.util.subp')
|
|
- def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill):
|
|
+ def test_dhcp_discovery_run_in_sandbox(self, m_subp, m_kill, m_getppid):
|
|
"""dhcp_discovery brings up the interface and runs dhclient.
|
|
|
|
It also returns the parsed dhcp.leases file generated in the sandbox.
|
|
@@ -197,6 +203,7 @@ class TestDHCPDiscoveryClean(CiTestCase):
|
|
pid_file = os.path.join(tmpdir, 'dhclient.pid')
|
|
my_pid = 1
|
|
write_file(pid_file, "%d\n" % my_pid)
|
|
+ m_getppid.return_value = 1 # Indicate that dhclient has daemonized
|
|
|
|
self.assertItemsEqual(
|
|
[{'interface': 'eth9', 'fixed-address': '192.168.2.74',
|
|
@@ -355,3 +362,5 @@ class TestEphemeralDhcpNoNetworkSetup(HttprettyTestCase):
|
|
self.assertEqual(fake_lease, lease)
|
|
# Ensure that dhcp discovery occurs
|
|
m_dhcp.called_once_with()
|
|
+
|
|
+# vi: ts=4 expandtab
|
|
diff --git a/cloudinit/temp_utils.py b/cloudinit/temp_utils.py
|
|
index c98a1b53..346276ec 100644
|
|
--- a/cloudinit/temp_utils.py
|
|
+++ b/cloudinit/temp_utils.py
|
|
@@ -81,7 +81,7 @@ def ExtendedTemporaryFile(**kwargs):
|
|
|
|
|
|
@contextlib.contextmanager
|
|
-def tempdir(**kwargs):
|
|
+def tempdir(rmtree_ignore_errors=False, **kwargs):
|
|
# This seems like it was only added in python 3.2
|
|
# Make it since its useful...
|
|
# See: http://bugs.python.org/file12970/tempdir.patch
|
|
@@ -89,7 +89,7 @@ def tempdir(**kwargs):
|
|
try:
|
|
yield tdir
|
|
finally:
|
|
- shutil.rmtree(tdir)
|
|
+ shutil.rmtree(tdir, ignore_errors=rmtree_ignore_errors)
|
|
|
|
|
|
def mkdtemp(**kwargs):
|
|
diff --git a/cloudinit/tests/test_temp_utils.py b/cloudinit/tests/test_temp_utils.py
|
|
index ffbb92cd..4a52ef89 100644
|
|
--- a/cloudinit/tests/test_temp_utils.py
|
|
+++ b/cloudinit/tests/test_temp_utils.py
|
|
@@ -2,8 +2,9 @@
|
|
|
|
"""Tests for cloudinit.temp_utils"""
|
|
|
|
-from cloudinit.temp_utils import mkdtemp, mkstemp
|
|
+from cloudinit.temp_utils import mkdtemp, mkstemp, tempdir
|
|
from cloudinit.tests.helpers import CiTestCase, wrap_and_call
|
|
+import os
|
|
|
|
|
|
class TestTempUtils(CiTestCase):
|
|
@@ -98,4 +99,19 @@ class TestTempUtils(CiTestCase):
|
|
self.assertEqual('/fake/return/path', retval)
|
|
self.assertEqual([{'dir': '/run/cloud-init/tmp'}], calls)
|
|
|
|
+ def test_tempdir_error_suppression(self):
|
|
+ """test tempdir suppresses errors during directory removal."""
|
|
+
|
|
+ with self.assertRaises(OSError):
|
|
+ with tempdir(prefix='cloud-init-dhcp-') as tdir:
|
|
+ os.rmdir(tdir)
|
|
+ # As a result, the directory is already gone,
|
|
+ # so shutil.rmtree should raise OSError
|
|
+
|
|
+ with tempdir(rmtree_ignore_errors=True,
|
|
+ prefix='cloud-init-dhcp-') as tdir:
|
|
+ os.rmdir(tdir)
|
|
+ # Since the directory is already gone, shutil.rmtree would raise
|
|
+ # OSError, but we suppress that
|
|
+
|
|
# vi: ts=4 expandtab
|
|
diff --git a/cloudinit/util.py b/cloudinit/util.py
|
|
index 7800f7bc..a84112a9 100644
|
|
--- a/cloudinit/util.py
|
|
+++ b/cloudinit/util.py
|
|
@@ -2861,7 +2861,6 @@ def mount_is_read_write(mount_point):
|
|
mount_opts = result[-1].split(',')
|
|
return mount_opts[0] == 'rw'
|
|
|
|
-
|
|
def udevadm_settle(exists=None, timeout=None):
|
|
"""Invoke udevadm settle with optional exists and timeout parameters"""
|
|
settle_cmd = ["udevadm", "settle"]
|
|
@@ -2875,5 +2874,20 @@ def udevadm_settle(exists=None, timeout=None):
|
|
|
|
return subp(settle_cmd)
|
|
|
|
+def get_proc_ppid(pid):
|
|
+ """
|
|
+ Return the parent pid of a process.
|
|
+ """
|
|
+ ppid = 0
|
|
+ try:
|
|
+ contents = load_file("/proc/%s/stat" % pid, quiet=True)
|
|
+ except IOError as e:
|
|
+ LOG.warning('Failed to load /proc/%s/stat. %s', pid, e)
|
|
+ if contents:
|
|
+ parts = contents.split(" ", 4)
|
|
+ # man proc says
|
|
+ # ppid %d (4) The PID of the parent.
|
|
+ ppid = int(parts[3])
|
|
+ return ppid
|
|
|
|
# vi: ts=4 expandtab
|
|
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
|
|
index 5a14479a..8aebcd62 100644
|
|
--- a/tests/unittests/test_util.py
|
|
+++ b/tests/unittests/test_util.py
|
|
@@ -1114,6 +1114,12 @@ class TestLoadShellContent(helpers.TestCase):
|
|
'key3="val3 #tricky"',
|
|
''])))
|
|
|
|
+ def test_get_proc_ppid(self):
|
|
+ """get_proc_ppid returns correct parent pid value."""
|
|
+ my_pid = os.getpid()
|
|
+ my_ppid = os.getppid()
|
|
+ self.assertEqual(my_ppid, util.get_proc_ppid(my_pid))
|
|
+
|
|
|
|
class TestGetProcEnv(helpers.TestCase):
|
|
"""test get_proc_env."""
|
|
--
|
|
2.20.1
|
|
|