From 96cb2f3776d453c50e55185a50c980ff210b1719 Mon Sep 17 00:00:00 2001
From: Jonathan Lebon <jonathan@jlebon.com>
Date: Fri, 31 Jul 2020 09:24:33 -0400
Subject: [PATCH] cloudstack|openstack: propagate ErrNeedNet

On CloudStack/OpenStack, we fetch from three different sources
simultaneously: two config drives, and the metadata service.
Error-handling for these goroutines was causing `ErrNeedNet` from the
latter to be ignored and so we weren't correctly propagating it back to
the caller (which keys off of it to signal to the OS that networking is
needed).

Do a simple hack where we check if `ErrNeedNet` was hit and if none of
the fetchers succeed, then we return that instead. (The better fix of
course is to not try to parallel guess the metadata source like this,
but that's a much bigger issue.)

Fixes: #956
Fixes: #1056
---
 internal/providers/cloudstack/cloudstack.go | 10 ++++++++++
 internal/providers/openstack/openstack.go   | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/internal/providers/cloudstack/cloudstack.go b/internal/providers/cloudstack/cloudstack.go
index d4cc440c..83ed3700 100644
--- a/internal/providers/cloudstack/cloudstack.go
+++ b/internal/providers/cloudstack/cloudstack.go
@@ -50,6 +50,8 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 	var data []byte
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 
+	sawErrNeedNet := false
+
 	dispatch := func(name string, fn func() ([]byte, error)) {
 		raw, err := fn()
 		if err != nil {
@@ -57,6 +59,9 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 			case context.Canceled:
 			case context.DeadlineExceeded:
 				f.Logger.Err("timed out while fetching config from %s", name)
+			case resource.ErrNeedNet:
+				sawErrNeedNet = true
+				fallthrough
 			default:
 				f.Logger.Err("failed to fetch config from %s: %v", name, err)
 			}
@@ -81,6 +86,11 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 
 	<-ctx.Done()
 	if ctx.Err() == context.DeadlineExceeded {
+		// Did we hit neednet? If so, propagate that up instead. The OS should
+		// retry fetching again once networking is up.
+		if sawErrNeedNet {
+			return types.Config{}, report.Report{}, resource.ErrNeedNet
+		}
 		f.Logger.Info("neither config drive nor metadata service were available in time. Continuing without a config...")
 	}
 
diff --git a/internal/providers/openstack/openstack.go b/internal/providers/openstack/openstack.go
index d511bda2..41699515 100644
--- a/internal/providers/openstack/openstack.go
+++ b/internal/providers/openstack/openstack.go
@@ -55,6 +55,8 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 	var data []byte
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 
+	sawErrNeedNet := false
+
 	dispatch := func(name string, fn func() ([]byte, error)) {
 		raw, err := fn()
 		if err != nil {
@@ -62,6 +64,9 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 			case context.Canceled:
 			case context.DeadlineExceeded:
 				f.Logger.Err("timed out while fetching config from %s", name)
+			case resource.ErrNeedNet:
+				sawErrNeedNet = true
+				fallthrough
 			default:
 				f.Logger.Err("failed to fetch config from %s: %v", name, err)
 			}
@@ -86,6 +91,11 @@ func FetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) {
 
 	<-ctx.Done()
 	if ctx.Err() == context.DeadlineExceeded {
+		// Did we hit neednet? If so, propagate that up instead. The OS should
+		// retry fetching again once networking is up.
+		if sawErrNeedNet {
+			return types.Config{}, report.Report{}, resource.ErrNeedNet
+		}
 		f.Logger.Info("neither config drive nor metadata service were available in time. Continuing without a config...")
 	}
 
-- 
2.26.2