From 82c427a71a8c9b542ac273b72077b3cb6ee8bed8 Mon Sep 17 00:00:00 2001
From: CentOS Sources <bugs@centos.org>
Date: Wed, 5 May 2021 22:16:22 +0000
Subject: [PATCH] import gcc-8.4.1-2.1.el8

# Conflicts:
#	SOURCES/gcc8-libgomp-20190503.patch
---
 .gcc.metadata                          |     2 +-
 .gitignore                             |     2 +-
 SOURCES/gcc8-libgomp-20190503.patch    | 10068 +++++++++++++++++++++++
 SOURCES/gcc8-pr95614-revert.patch      |    96 -
 SOURCES/gcc8-remove-old-demangle.patch |  8227 ++++++++++++++++++
 SPECS/gcc.spec                         |    25 +-
 6 files changed, 18315 insertions(+), 105 deletions(-)
 create mode 100644 SOURCES/gcc8-libgomp-20190503.patch
 delete mode 100644 SOURCES/gcc8-pr95614-revert.patch
 create mode 100644 SOURCES/gcc8-remove-old-demangle.patch

diff --git a/.gcc.metadata b/.gcc.metadata
index 2647001..6bd88fb 100644
--- a/.gcc.metadata
+++ b/.gcc.metadata
@@ -1,3 +1,3 @@
-6d0269f5a6a1907dfb48cb6d8b64538bde7f1f6d SOURCES/gcc-8.4.1-20200928.tar.xz
+40d1c8cf65658f57d999d23ba890ff40b7cd1cfe SOURCES/gcc-8.4.1-20210423.tar.xz
 3bdb3cc01fa7690a0e20ea5cfffcbe690f7665eb SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
 ce8eb83be0ac37fb5d5388df455a980fe37b4f13 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
diff --git a/.gitignore b/.gitignore
index 526d1a0..f3d61d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-SOURCES/gcc-8.4.1-20200928.tar.xz
+SOURCES/gcc-8.4.1-20210423.tar.xz
 SOURCES/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
 SOURCES/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
diff --git a/SOURCES/gcc8-libgomp-20190503.patch b/SOURCES/gcc8-libgomp-20190503.patch
new file mode 100644
index 0000000..fcd226d
--- /dev/null
+++ b/SOURCES/gcc8-libgomp-20190503.patch
@@ -0,0 +1,10068 @@
+--- libgomp/loop.c.jj	2018-04-25 09:40:31.870655561 +0200
++++ libgomp/loop.c	2019-05-07 18:46:36.526109736 +0200
+@@ -27,9 +27,13 @@
+ 
+ #include <limits.h>
+ #include <stdlib.h>
++#include <string.h>
+ #include "libgomp.h"
+ 
+ 
++ialias (GOMP_loop_runtime_next)
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ /* Initialize the given work share construct from the given arguments.  */
+ 
+ static inline void
+@@ -79,12 +83,12 @@ gomp_loop_init (struct gomp_work_share *
+ }
+ 
+ /* The *_start routines are called when first encountering a loop construct
+-   that is not bound directly to a parallel construct.  The first thread 
++   that is not bound directly to a parallel construct.  The first thread
+    that arrives will create the work-share construct; subsequent threads
+    will see the construct exists and allocate work from it.
+ 
+    START, END, INCR are the bounds of the loop; due to the restrictions of
+-   OpenMP, these values must be the same in every thread.  This is not 
++   OpenMP, these values must be the same in every thread.  This is not
+    verified (nor is it entirely verifiable, since START is not necessarily
+    retained intact in the work-share data structure).  CHUNK_SIZE is the
+    scheduling parameter; again this must be identical in all threads.
+@@ -101,7 +105,7 @@ gomp_loop_static_start (long start, long
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_STATIC, chunk_size);
+@@ -123,7 +127,7 @@ gomp_loop_dynamic_start (long start, lon
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_DYNAMIC, chunk_size);
+@@ -151,7 +155,7 @@ gomp_loop_guided_start (long start, long
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_GUIDED, chunk_size);
+@@ -174,7 +178,7 @@ GOMP_loop_runtime_start (long start, lon
+ 			 long *istart, long *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_static_start (start, end, incr,
+@@ -197,6 +201,100 @@ GOMP_loop_runtime_start (long start, lon
+     }
+ }
+ 
++static long
++gomp_adjust_sched (long sched, long *chunk_size)
++{
++  sched &= ~GFS_MONOTONIC;
++  switch (sched)
++    {
++    case GFS_STATIC:
++    case GFS_DYNAMIC:
++    case GFS_GUIDED:
++      return sched;
++    /* GFS_RUNTIME is used for runtime schedule without monotonic
++       or nonmonotonic modifiers on the clause.
++       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
++       modifier.  */
++    case GFS_RUNTIME:
++    /* GFS_AUTO is used for runtime schedule with nonmonotonic
++       modifier.  */
++    case GFS_AUTO:
++      {
++	struct gomp_task_icv *icv = gomp_icv (false);
++	sched = icv->run_sched_var & ~GFS_MONOTONIC;
++	switch (sched)
++	  {
++	  case GFS_STATIC:
++	  case GFS_DYNAMIC:
++	  case GFS_GUIDED:
++	    *chunk_size = icv->run_sched_chunk_size;
++	    break;
++	  case GFS_AUTO:
++	    sched = GFS_STATIC;
++	    *chunk_size = 0;
++	    break;
++	  default:
++	    abort ();
++	  }
++	return sched;
++      }
++    default:
++      abort ();
++    }
++}
++
++bool
++GOMP_loop_start (long start, long end, long incr, long sched,
++		 long chunk_size, long *istart, long *iend,
++		 uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (gomp_work_share_start (0))
++    {
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_init (thr->ts.work_share, start, end, incr,
++		      sched, chunk_size);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      if (mem)
++	{
++	  uintptr_t size = (uintptr_t) *mem;
++	  if (size > (sizeof (struct gomp_work_share)
++		      - offsetof (struct gomp_work_share,
++				  inline_ordered_team_ids)))
++	    thr->ts.work_share->ordered_team_ids
++	      = gomp_malloc_cleared (size);
++	  else
++	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
++	}
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      if (mem)
++	*mem = (void *) thr->ts.work_share->ordered_team_ids;
++    }
++
++  if (!istart)
++    return true;
++  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
++}
++
+ /* The *_ordered_*_start routines are similar.  The only difference is that
+    this work-share construct is initialized to expect an ORDERED section.  */
+ 
+@@ -207,7 +305,7 @@ gomp_loop_ordered_static_start (long sta
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_STATIC, chunk_size);
+@@ -225,7 +323,7 @@ gomp_loop_ordered_dynamic_start (long st
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_DYNAMIC, chunk_size);
+@@ -250,7 +348,7 @@ gomp_loop_ordered_guided_start (long sta
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_init (thr->ts.work_share, start, end, incr,
+ 		      GFS_GUIDED, chunk_size);
+@@ -273,7 +371,7 @@ GOMP_loop_ordered_runtime_start (long st
+ 				 long *istart, long *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_ordered_static_start (start, end, incr,
+@@ -297,6 +395,81 @@ GOMP_loop_ordered_runtime_start (long st
+     }
+ }
+ 
++bool
++GOMP_loop_ordered_start (long start, long end, long incr, long sched,
++			 long chunk_size, long *istart, long *iend,
++			 uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  size_t ordered = 1;
++  bool ret;
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (mem)
++    ordered += (uintptr_t) *mem;
++  if (gomp_work_share_start (ordered))
++    {
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_init (thr->ts.work_share, start, end, incr,
++		      sched, chunk_size);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      if (sched == GFS_STATIC)
++	gomp_ordered_static_init ();
++      else
++	gomp_mutex_lock (&thr->ts.work_share->lock);
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      sched = thr->ts.work_share->sched;
++      if (sched != GFS_STATIC)
++	gomp_mutex_lock (&thr->ts.work_share->lock);
++    }
++
++  if (mem)
++    {
++      uintptr_t p
++	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
++		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
++      p += __alignof__ (long long) - 1;
++      p &= ~(__alignof__ (long long) - 1);
++      *mem = (void *) p;
++    }
++
++  switch (sched)
++    {
++    case GFS_STATIC:
++    case GFS_AUTO:
++      return !gomp_iter_static_next (istart, iend);
++    case GFS_DYNAMIC:
++      ret = gomp_iter_dynamic_next_locked (istart, iend);
++      break;
++    case GFS_GUIDED:
++      ret = gomp_iter_guided_next_locked (istart, iend);
++      break;
++    default:
++      abort ();
++    }
++
++  if (ret)
++    gomp_ordered_first ();
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++  return ret;
++}
++
+ /* The *_doacross_*_start routines are similar.  The only difference is that
+    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+@@ -310,11 +483,11 @@ gomp_loop_doacross_static_start (unsigne
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ 		      GFS_STATIC, chunk_size);
+-      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -328,11 +501,11 @@ gomp_loop_doacross_dynamic_start (unsign
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ 		      GFS_DYNAMIC, chunk_size);
+-      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -354,11 +527,11 @@ gomp_loop_doacross_guided_start (unsigne
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ 		      GFS_GUIDED, chunk_size);
+-      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -378,7 +551,7 @@ GOMP_loop_doacross_runtime_start (unsign
+ 				  long *istart, long *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_doacross_static_start (ncounts, counts,
+@@ -402,8 +575,52 @@ GOMP_loop_doacross_runtime_start (unsign
+     }
+ }
+ 
+-/* The *_next routines are called when the thread completes processing of 
+-   the iteration block currently assigned to it.  If the work-share 
++bool
++GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
++			  long chunk_size, long *istart, long *iend,
++			  uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (gomp_work_share_start (0))
++    {
++      size_t extra = 0;
++      if (mem)
++	extra = (uintptr_t) *mem;
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
++		      sched, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size, extra);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      sched = thr->ts.work_share->sched;
++    }
++
++  if (mem)
++    *mem = thr->ts.work_share->doacross->extra;
++
++  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
++}
++
++/* The *_next routines are called when the thread completes processing of
++   the iteration block currently assigned to it.  If the work-share
+    construct is bound directly to a parallel construct, then the iteration
+    bounds may have been set up before the parallel.  In which case, this
+    may be the first iteration for the thread.
+@@ -456,7 +673,7 @@ bool
+ GOMP_loop_runtime_next (long *istart, long *iend)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+-  
++
+   switch (thr->ts.work_share->sched)
+     {
+     case GFS_STATIC:
+@@ -534,7 +751,7 @@ bool
+ GOMP_loop_ordered_runtime_next (long *istart, long *iend)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+-  
++
+   switch (thr->ts.work_share->sched)
+     {
+     case GFS_STATIC:
+@@ -563,7 +780,7 @@ gomp_parallel_loop_start (void (*fn) (vo
+   num_threads = gomp_resolve_num_threads (num_threads, 0);
+   team = gomp_new_team (num_threads);
+   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
+-  gomp_team_start (fn, data, num_threads, flags, team);
++  gomp_team_start (fn, data, num_threads, flags, team, NULL);
+ }
+ 
+ void
+@@ -600,7 +817,8 @@ GOMP_parallel_loop_runtime_start (void (
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+-			    icv->run_sched_var, icv->run_sched_chunk_size, 0);
++			    icv->run_sched_var & ~GFS_MONOTONIC,
++			    icv->run_sched_chunk_size, 0);
+ }
+ 
+ ialias_redirect (GOMP_parallel_end)
+@@ -638,11 +856,28 @@ GOMP_parallel_loop_guided (void (*fn) (v
+   GOMP_parallel_end ();
+ }
+ 
++void
++GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
++			    unsigned num_threads, long start, long end,
++			    long incr, unsigned flags)
++{
++  struct gomp_task_icv *icv = gomp_icv (false);
++  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++			    icv->run_sched_var & ~GFS_MONOTONIC,
++			    icv->run_sched_chunk_size, flags);
++  fn (data);
++  GOMP_parallel_end ();
++}
++
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+ extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
+ 	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
+ extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
+ 	__attribute__((alias ("GOMP_parallel_loop_guided")));
++extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
++	__attribute__((alias ("GOMP_parallel_loop_runtime")));
++extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
++	__attribute__((alias ("GOMP_parallel_loop_runtime")));
+ #else
+ void
+ GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
+@@ -667,21 +902,35 @@ GOMP_parallel_loop_nonmonotonic_guided (
+   fn (data);
+   GOMP_parallel_end ();
+ }
+-#endif
+ 
+ void
+-GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
+-			    unsigned num_threads, long start, long end,
+-			    long incr, unsigned flags)
++GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
++					 unsigned num_threads, long start,
++					 long end, long incr, unsigned flags)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+-			    icv->run_sched_var, icv->run_sched_chunk_size,
+-			    flags);
++			    icv->run_sched_var & ~GFS_MONOTONIC,
++			    icv->run_sched_chunk_size, flags);
+   fn (data);
+   GOMP_parallel_end ();
+ }
+ 
++void
++GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
++					       unsigned num_threads, long start,
++					       long end, long incr,
++					       unsigned flags)
++{
++  struct gomp_task_icv *icv = gomp_icv (false);
++  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++			    icv->run_sched_var & ~GFS_MONOTONIC,
++			    icv->run_sched_chunk_size, flags);
++  fn (data);
++  GOMP_parallel_end ();
++}
++#endif
++
+ /* The GOMP_loop_end* routines are called after the thread is told that
+    all loop iterations are complete.  The first two versions synchronize
+    all threads; the nowait version does not.  */
+@@ -721,6 +970,10 @@ extern __typeof(gomp_loop_dynamic_start)
+ 	__attribute__((alias ("gomp_loop_dynamic_start")));
+ extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
+ 	__attribute__((alias ("gomp_loop_guided_start")));
++extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
++	__attribute__((alias ("GOMP_loop_runtime_start")));
++extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
++	__attribute__((alias ("GOMP_loop_runtime_start")));
+ 
+ extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
+ 	__attribute__((alias ("gomp_loop_ordered_static_start")));
+@@ -746,6 +999,10 @@ extern __typeof(gomp_loop_dynamic_next)
+ 	__attribute__((alias ("gomp_loop_dynamic_next")));
+ extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
+ 	__attribute__((alias ("gomp_loop_guided_next")));
++extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
++	__attribute__((alias ("GOMP_loop_runtime_next")));
++extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
++	__attribute__((alias ("GOMP_loop_runtime_next")));
+ 
+ extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
+ 	__attribute__((alias ("gomp_loop_ordered_static_next")));
+@@ -791,6 +1048,20 @@ GOMP_loop_nonmonotonic_guided_start (lon
+ }
+ 
+ bool
++GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
++				      long *istart, long *iend)
++{
++  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
++}
++
++bool
++GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
++					    long *istart, long *iend)
++{
++  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
++}
++
++bool
+ GOMP_loop_ordered_static_start (long start, long end, long incr,
+ 				long chunk_size, long *istart, long *iend)
+ {
+@@ -869,6 +1140,18 @@ GOMP_loop_nonmonotonic_guided_next (long
+ }
+ 
+ bool
++GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
++{
++  return GOMP_loop_runtime_next (istart, iend);
++}
++
++bool
++GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
++{
++  return GOMP_loop_runtime_next (istart, iend);
++}
++
++bool
+ GOMP_loop_ordered_static_next (long *istart, long *iend)
+ {
+   return gomp_loop_ordered_static_next (istart, iend);
+--- libgomp/oacc-plugin.c.jj	2018-04-25 09:40:31.322655307 +0200
++++ libgomp/oacc-plugin.c	2019-05-07 18:46:36.531109656 +0200
+@@ -49,3 +49,14 @@ GOMP_PLUGIN_acc_thread (void)
+   struct goacc_thread *thr = goacc_thread ();
+   return thr ? thr->target_tls : NULL;
+ }
++
++int
++GOMP_PLUGIN_acc_default_dim (unsigned int i)
++{
++  if (i >= GOMP_DIM_MAX)
++    {
++      gomp_fatal ("invalid dimension argument: %d", i);
++      return -1;
++    }
++  return goacc_default_dims[i];
++}
+--- libgomp/libgomp_g.h.jj	2018-04-25 09:40:31.320655306 +0200
++++ libgomp/libgomp_g.h	2019-05-07 18:46:36.513109943 +0200
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2005-2018 Free Software Foundation, Inc.
++/* Copyright (C) 2005-2019 Free Software Foundation, Inc.
+    Contributed by Richard Henderson <rth@redhat.com>.
+ 
+    This file is part of the GNU Offloading and Multi Processing Library
+@@ -31,6 +31,7 @@
+ 
+ #include <stdbool.h>
+ #include <stddef.h>
++#include "gstdint.h"
+ 
+ /* barrier.c */
+ 
+@@ -56,6 +57,12 @@ extern bool GOMP_loop_nonmonotonic_dynam
+ 						  long *, long *);
+ extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long,
+ 						 long *, long *);
++extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long,
++						  long *, long *);
++extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long,
++							long *, long *);
++extern bool GOMP_loop_start (long, long, long, long, long, long *, long *,
++			     uintptr_t *, void **);
+ 
+ extern bool GOMP_loop_ordered_static_start (long, long, long, long,
+ 					    long *, long *);
+@@ -64,6 +71,8 @@ extern bool GOMP_loop_ordered_dynamic_st
+ extern bool GOMP_loop_ordered_guided_start (long, long, long, long,
+ 					    long *, long *);
+ extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
++extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *,
++				     long *, uintptr_t *, void **);
+ 
+ extern bool GOMP_loop_static_next (long *, long *);
+ extern bool GOMP_loop_dynamic_next (long *, long *);
+@@ -71,6 +80,8 @@ extern bool GOMP_loop_guided_next (long
+ extern bool GOMP_loop_runtime_next (long *, long *);
+ extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *);
+ extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *);
++extern bool GOMP_loop_nonmonotonic_runtime_next (long *, long *);
++extern bool GOMP_loop_maybe_nonmonotonic_runtime_next (long *, long *);
+ 
+ extern bool GOMP_loop_ordered_static_next (long *, long *);
+ extern bool GOMP_loop_ordered_dynamic_next (long *, long *);
+@@ -85,6 +96,8 @@ extern bool GOMP_loop_doacross_guided_st
+ 					     long *);
+ extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
+ 					      long *);
++extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *,
++				      long *, uintptr_t *, void **);
+ 
+ extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
+ 					     unsigned, long, long, long, long);
+@@ -112,6 +125,13 @@ extern void GOMP_parallel_loop_nonmonoto
+ extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *,
+ 						    unsigned, long, long,
+ 						    long, long, unsigned);
++extern void GOMP_parallel_loop_nonmonotonic_runtime (void (*)(void *), void *,
++						     unsigned, long, long,
++						     long, unsigned);
++extern void GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*)(void *),
++							   void *, unsigned,
++							   long, long,
++							   long, unsigned);
+ 
+ extern void GOMP_loop_end (void);
+ extern void GOMP_loop_end_nowait (void);
+@@ -154,6 +174,21 @@ extern bool GOMP_loop_ull_nonmonotonic_g
+ 						     unsigned long long,
+ 						     unsigned long long *,
+ 						     unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_runtime_start (bool, unsigned long long,
++						      unsigned long long,
++						      unsigned long long,
++						      unsigned long long *,
++						      unsigned long long *);
++extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool,
++							    unsigned long long,
++							    unsigned long long,
++							    unsigned long long,
++							    unsigned long long *,
++							    unsigned long long *);
++extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long,
++				 unsigned long long, long, unsigned long long,
++				 unsigned long long *, unsigned long long *,
++				 uintptr_t *, void **);
+ 
+ extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
+ 						unsigned long long,
+@@ -178,6 +213,13 @@ extern bool GOMP_loop_ull_ordered_runtim
+ 						 unsigned long long,
+ 						 unsigned long long *,
+ 						 unsigned long long *);
++extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long,
++					 unsigned long long,
++					 unsigned long long, long,
++					 unsigned long long,
++					 unsigned long long *,
++					 unsigned long long *,
++					 uintptr_t *, void **);
+ 
+ extern bool GOMP_loop_ull_static_next (unsigned long long *,
+ 				       unsigned long long *);
+@@ -191,6 +233,10 @@ extern bool GOMP_loop_ull_nonmonotonic_d
+ 						     unsigned long long *);
+ extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *,
+ 						    unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_runtime_next (unsigned long long *,
++						     unsigned long long *);
++extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_next (unsigned long long *,
++							   unsigned long long *);
+ 
+ extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *,
+ 					       unsigned long long *);
+@@ -220,6 +266,11 @@ extern bool GOMP_loop_ull_doacross_runti
+ 						  unsigned long long *,
+ 						  unsigned long long *,
+ 						  unsigned long long *);
++extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *,
++					  long, unsigned long long,
++					  unsigned long long *,
++					  unsigned long long *,
++					  uintptr_t *, void **);
+ 
+ /* ordered.c */
+ 
+@@ -235,6 +286,8 @@ extern void GOMP_doacross_ull_wait (unsi
+ extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
+ extern void GOMP_parallel_end (void);
+ extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
++extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned,
++					  unsigned);
+ extern bool GOMP_cancel (int, bool);
+ extern bool GOMP_cancellation_point (int);
+ 
+@@ -251,13 +304,19 @@ extern void GOMP_taskloop_ull (void (*)
+ 			       unsigned long long, unsigned long long,
+ 			       unsigned long long);
+ extern void GOMP_taskwait (void);
++extern void GOMP_taskwait_depend (void **);
+ extern void GOMP_taskyield (void);
+ extern void GOMP_taskgroup_start (void);
+ extern void GOMP_taskgroup_end (void);
++extern void GOMP_taskgroup_reduction_register (uintptr_t *);
++extern void GOMP_taskgroup_reduction_unregister (uintptr_t *);
++extern void GOMP_task_reduction_remap (size_t, size_t, void **);
++extern void GOMP_workshare_task_reduction_unregister (bool);
+ 
+ /* sections.c */
+ 
+ extern unsigned GOMP_sections_start (unsigned);
++extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **);
+ extern unsigned GOMP_sections_next (void);
+ extern void GOMP_parallel_sections_start (void (*) (void *), void *,
+ 					  unsigned, unsigned);
+@@ -293,6 +352,11 @@ extern void GOMP_target_enter_exit_data
+ 					 void **);
+ extern void GOMP_teams (unsigned int, unsigned int);
+ 
++/* teams.c */
++
++extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
++			    unsigned);
++
+ /* oacc-parallel.c */
+ 
+ extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
+--- libgomp/affinity.c.jj	2018-04-25 09:40:31.913655581 +0200
++++ libgomp/affinity.c	2019-05-07 18:46:36.254114081 +0200
+@@ -26,6 +26,8 @@
+ /* This is a generic stub implementation of a CPU affinity setting.  */
+ 
+ #include "libgomp.h"
++#include <string.h>
++#include <stdio.h>
+ 
+ void
+ gomp_init_affinity (void)
+@@ -138,5 +140,17 @@ gomp_get_place_proc_ids_8 (int place_num
+   (void) ids;
+ }
+ 
++void
++gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
++			     int place)
++{
++  char buf[sizeof (long) * 3 + 4];
++  if (gomp_available_cpus > 1)
++    sprintf (buf, "0-%lu", gomp_available_cpus - 1);
++  else
++    strcpy (buf, "0");
++  gomp_display_string (buffer, size, ret, buf, strlen (buf));
++}
++
+ ialias(omp_get_place_num_procs)
+ ialias(omp_get_place_proc_ids)
+--- libgomp/sections.c.jj	2018-04-25 09:40:31.924655586 +0200
++++ libgomp/sections.c	2019-05-07 18:46:36.535109592 +0200
+@@ -26,8 +26,11 @@
+ /* This file handles the SECTIONS construct.  */
+ 
+ #include "libgomp.h"
++#include <string.h>
+ 
+ 
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ /* Initialize the given work share construct from the given arguments.  */
+ 
+ static inline void
+@@ -72,7 +75,7 @@ GOMP_sections_start (unsigned count)
+   struct gomp_thread *thr = gomp_thread ();
+   long s, e, ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_sections_init (thr->ts.work_share, count);
+       gomp_work_share_init_done ();
+@@ -95,6 +98,66 @@ GOMP_sections_start (unsigned count)
+   return ret;
+ }
+ 
++unsigned
++GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  long s, e, ret;
++
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (gomp_work_share_start (0))
++    {
++      gomp_sections_init (thr->ts.work_share, count);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      if (mem)
++	{
++	  uintptr_t size = (uintptr_t) *mem;
++	  if (size > (sizeof (struct gomp_work_share)
++		      - offsetof (struct gomp_work_share,
++				  inline_ordered_team_ids)))
++	    thr->ts.work_share->ordered_team_ids
++	      = gomp_malloc_cleared (size);
++	  else
++	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
++	}
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      if (mem)
++	*mem = (void *) thr->ts.work_share->ordered_team_ids;
++    }
++
++#ifdef HAVE_SYNC_BUILTINS
++  if (gomp_iter_dynamic_next (&s, &e))
++    ret = s;
++  else
++    ret = 0;
++#else
++  gomp_mutex_lock (&thr->ts.work_share->lock);
++  if (gomp_iter_dynamic_next_locked (&s, &e))
++    ret = s;
++  else
++    ret = 0;
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++  return ret;
++}
++
+ /* This routine is called when the thread completes processing of the
+    section currently assigned to it.  If the work-share construct is
+    bound directly to a parallel construct, then the construct may have
+@@ -140,7 +203,7 @@ GOMP_parallel_sections_start (void (*fn)
+   num_threads = gomp_resolve_num_threads (num_threads, count);
+   team = gomp_new_team (num_threads);
+   gomp_sections_init (&team->work_shares[0], count);
+-  gomp_team_start (fn, data, num_threads, 0, team);
++  gomp_team_start (fn, data, num_threads, 0, team, NULL);
+ }
+ 
+ ialias_redirect (GOMP_parallel_end)
+@@ -154,7 +217,7 @@ GOMP_parallel_sections (void (*fn) (void
+   num_threads = gomp_resolve_num_threads (num_threads, count);
+   team = gomp_new_team (num_threads);
+   gomp_sections_init (&team->work_shares[0], count);
+-  gomp_team_start (fn, data, num_threads, flags, team);
++  gomp_team_start (fn, data, num_threads, flags, team, NULL);
+   fn (data);
+   GOMP_parallel_end ();
+ }
+--- libgomp/config/linux/affinity.c.jj	2018-04-25 09:40:31.875655563 +0200
++++ libgomp/config/linux/affinity.c	2019-05-07 18:46:36.344112642 +0200
+@@ -396,6 +396,56 @@ gomp_get_place_proc_ids_8 (int place_num
+       *ids++ = i;
+ }
+ 
++void
++gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
++			     int place)
++{
++  cpu_set_t *cpusetp;
++  char buf[sizeof (long) * 3 + 4];
++  if (place >= 0 && place < gomp_places_list_len)
++    cpusetp = (cpu_set_t *) gomp_places_list[place];
++  else if (gomp_cpusetp)
++    cpusetp = gomp_cpusetp;
++  else
++    {
++      if (gomp_available_cpus > 1)
++	sprintf (buf, "0-%lu", gomp_available_cpus - 1);
++      else
++	strcpy (buf, "0");
++      gomp_display_string (buffer, size, ret, buf, strlen (buf));
++      return;
++    }
++
++  unsigned long i, max = 8 * gomp_cpuset_size, start;
++  bool prev_set = false;
++  start = max;
++  for (i = 0; i <= max; i++)
++    {
++      bool this_set;
++      if (i == max)
++	this_set = false;
++      else
++	this_set = CPU_ISSET_S (i, gomp_cpuset_size, cpusetp);
++      if (this_set != prev_set)
++	{
++	  prev_set = this_set;
++	  if (this_set)
++	    {
++	      char *p = buf;
++	      if (start != max)
++		*p++ = ',';
++	      sprintf (p, "%lu", i);
++	      start = i;
++	    }
++	  else if (i == start + 1)
++	    continue;
++	  else
++	    sprintf (buf, "-%lu", i - 1);
++	  gomp_display_string (buffer, size, ret, buf, strlen (buf));
++	}
++    }
++}
++
+ ialias(omp_get_place_num_procs)
+ ialias(omp_get_place_proc_ids)
+ 
+--- libgomp/config/linux/ia64/futex.h.jj	2018-04-25 09:40:31.877655564 +0200
++++ libgomp/config/linux/ia64/futex.h	2019-05-07 18:46:36.344112642 +0200
+@@ -45,8 +45,8 @@ sys_futex0(int *addr, int op, int val)
+ 	  "=r"(r8), "=r"(r10)
+ 	: "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3)
+ 	: "memory", "out4", "out5", "out6", "out7",
+-	  /* Non-stacked integer registers, minus r8, r10, r15.  */
+-	  "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18",
++	  /* Non-stacked integer registers, minus r8, r10, r12, r15.  */
++	  "r2", "r3", "r9", "r11", "r13", "r14", "r16", "r17", "r18",
+ 	  "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27",
+ 	  "r28", "r29", "r30", "r31",
+ 	  /* Predicate registers.  */
+--- libgomp/config/nvptx/teams.c.jj	2019-05-07 18:46:36.459110805 +0200
++++ libgomp/config/nvptx/teams.c	2019-05-07 18:46:36.459110805 +0200
+@@ -0,0 +1,57 @@
++/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
++   Contributed by Alexander Monakov <amonakov@ispras.ru>
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file defines OpenMP API entry points that accelerator targets are
++   expected to replace.  */
++
++#include "libgomp.h"
++
++void
++GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
++		unsigned int thread_limit, unsigned int flags)
++{
++  (void) fn;
++  (void) data;
++  (void) flags;
++  (void) num_teams;
++  (void) thread_limit;
++}
++
++int
++omp_get_num_teams (void)
++{
++  return gomp_num_teams_var + 1;
++}
++
++int
++omp_get_team_num (void)
++{
++  int ctaid;
++  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
++  return ctaid;
++}
++
++ialias (omp_get_num_teams)
++ialias (omp_get_team_num)
+--- libgomp/config/nvptx/team.c.jj	2018-04-25 09:40:31.890655570 +0200
++++ libgomp/config/nvptx/team.c	2019-05-07 18:46:36.459110805 +0200
+@@ -116,7 +116,8 @@ gomp_thread_start (struct gomp_thread_po
+ 
+ void
+ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+-		 unsigned flags, struct gomp_team *team)
++		 unsigned flags, struct gomp_team *team,
++		 struct gomp_taskgroup *taskgroup)
+ {
+   struct gomp_thread *thr, *nthr;
+   struct gomp_task *task;
+@@ -147,6 +148,7 @@ gomp_team_start (void (*fn) (void *), vo
+   nthreads_var = icv->nthreads_var;
+   gomp_init_task (thr->task, task, icv);
+   team->implicit_task[0].icv.nthreads_var = nthreads_var;
++  team->implicit_task[0].taskgroup = taskgroup;
+ 
+   if (nthreads == 1)
+     return;
+@@ -166,6 +168,7 @@ gomp_team_start (void (*fn) (void *), vo
+       nthr->task = &team->implicit_task[i];
+       gomp_init_task (nthr->task, task, icv);
+       team->implicit_task[i].icv.nthreads_var = nthreads_var;
++      team->implicit_task[i].taskgroup = taskgroup;
+       nthr->fn = fn;
+       nthr->data = data;
+       team->ordered_release[i] = &nthr->release;
+@@ -174,5 +177,11 @@ gomp_team_start (void (*fn) (void *), vo
+   gomp_simple_barrier_wait (&pool->threads_dock);
+ }
+ 
++int
++gomp_pause_host (void)
++{
++  return -1;
++}
++
+ #include "../../team.c"
+ #endif
+--- libgomp/config/nvptx/oacc-parallel.c.jj	2018-04-25 09:40:31.887655569 +0200
++++ libgomp/config/nvptx/oacc-parallel.c	2019-05-07 18:46:36.453110901 +0200
+@@ -1,358 +0,0 @@
+-/* OpenACC constructs
+-
+-   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+-
+-   Contributed by Mentor Embedded.
+-
+-   This file is part of the GNU Offloading and Multi Processing Library
+-   (libgomp).
+-
+-   Libgomp is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published by
+-   the Free Software Foundation; either version 3, or (at your option)
+-   any later version.
+-
+-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+-   more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include "libgomp_g.h"
+-
+-__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n"
+-	 "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n"
+-	 "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n"
+-	 "// BEGIN GLOBAL FUNCTION DECL: abort\n"
+-	 ".extern .func abort;\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n"
+-	 "{\n"
+-	 ".reg .u32 %ar1;\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 ".reg .pred %r30;\n"
+-	 ".reg .u32 %r31;\n"
+-	 ".reg .pred %r32;\n"
+-	 ".reg .u32 %r33;\n"
+-	 ".reg .pred %r34;\n"
+-	 ".local .align 8 .b8 %frame[4];\n"
+-	 "ld.param.u32 %ar1,[%in_ar1];\n"
+-	 "mov.u32 %r27,%ar1;\n"
+-	 "st.local.u32 [%frame],%r27;\n"
+-	 "ld.local.u32 %r28,[%frame];\n"
+-	 "mov.u32 %r29,1;\n"
+-	 "setp.eq.u32 %r30,%r28,%r29;\n"
+-	 "@%r30 bra $L4;\n"
+-	 "mov.u32 %r31,2;\n"
+-	 "setp.eq.u32 %r32,%r28,%r31;\n"
+-	 "@%r32 bra $L5;\n"
+-	 "mov.u32 %r33,0;\n"
+-	 "setp.eq.u32 %r34,%r28,%r33;\n"
+-	 "@!%r34 bra $L8;\n"
+-	 "mov.u32 %r23,%tid.x;\n"
+-	 "mov.u32 %r22,%r23;\n"
+-	 "bra $L7;\n"
+-	 "$L4:\n"
+-	 "mov.u32 %r24,%tid.y;\n"
+-	 "mov.u32 %r22,%r24;\n"
+-	 "bra $L7;\n"
+-	 "$L5:\n"
+-	 "mov.u32 %r25,%tid.z;\n"
+-	 "mov.u32 %r22,%r25;\n"
+-	 "bra $L7;\n"
+-	 "$L8:\n"
+-	 "{\n"
+-	 "{\n"
+-	 "call abort;\n"
+-	 "}\n"
+-	 "}\n"
+-	 "$L7:\n"
+-	 "mov.u32 %r26,%r22;\n"
+-	 "mov.u32 %retval,%r26;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n"
+-	 "{\n"
+-	 ".reg .u32 %ar1;\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 ".reg .pred %r30;\n"
+-	 ".reg .u32 %r31;\n"
+-	 ".reg .pred %r32;\n"
+-	 ".reg .u32 %r33;\n"
+-	 ".reg .pred %r34;\n"
+-	 ".local .align 8 .b8 %frame[4];\n"
+-	 "ld.param.u32 %ar1,[%in_ar1];\n"
+-	 "mov.u32 %r27,%ar1;\n"
+-	 "st.local.u32 [%frame],%r27;\n"
+-	 "ld.local.u32 %r28,[%frame];\n"
+-	 "mov.u32 %r29,1;\n"
+-	 "setp.eq.u32 %r30,%r28,%r29;\n"
+-	 "@%r30 bra $L11;\n"
+-	 "mov.u32 %r31,2;\n"
+-	 "setp.eq.u32 %r32,%r28,%r31;\n"
+-	 "@%r32 bra $L12;\n"
+-	 "mov.u32 %r33,0;\n"
+-	 "setp.eq.u32 %r34,%r28,%r33;\n"
+-	 "@!%r34 bra $L15;\n"
+-	 "mov.u32 %r23,%ntid.x;\n"
+-	 "mov.u32 %r22,%r23;\n"
+-	 "bra $L14;\n"
+-	 "$L11:\n"
+-	 "mov.u32 %r24,%ntid.y;\n"
+-	 "mov.u32 %r22,%r24;\n"
+-	 "bra $L14;\n"
+-	 "$L12:\n"
+-	 "mov.u32 %r25,%ntid.z;\n"
+-	 "mov.u32 %r22,%r25;\n"
+-	 "bra $L14;\n"
+-	 "$L15:\n"
+-	 "{\n"
+-	 "{\n"
+-	 "call abort;\n"
+-	 "}\n"
+-	 "}\n"
+-	 "$L14:\n"
+-	 "mov.u32 %r26,%r22;\n"
+-	 "mov.u32 %retval,%r26;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n"
+-	 "{\n"
+-	 ".reg .u32 %ar1;\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 ".reg .pred %r30;\n"
+-	 ".reg .u32 %r31;\n"
+-	 ".reg .pred %r32;\n"
+-	 ".reg .u32 %r33;\n"
+-	 ".reg .pred %r34;\n"
+-	 ".local .align 8 .b8 %frame[4];\n"
+-	 "ld.param.u32 %ar1,[%in_ar1];\n"
+-	 "mov.u32 %r27,%ar1;\n"
+-	 "st.local.u32 [%frame],%r27;\n"
+-	 "ld.local.u32 %r28,[%frame];\n"
+-	 "mov.u32 %r29,1;\n"
+-	 "setp.eq.u32 %r30,%r28,%r29;\n"
+-	 "@%r30 bra $L18;\n"
+-	 "mov.u32 %r31,2;\n"
+-	 "setp.eq.u32 %r32,%r28,%r31;\n"
+-	 "@%r32 bra $L19;\n"
+-	 "mov.u32 %r33,0;\n"
+-	 "setp.eq.u32 %r34,%r28,%r33;\n"
+-	 "@!%r34 bra $L22;\n"
+-	 "mov.u32 %r23,%ctaid.x;\n"
+-	 "mov.u32 %r22,%r23;\n"
+-	 "bra $L21;\n"
+-	 "$L18:\n"
+-	 "mov.u32 %r24,%ctaid.y;\n"
+-	 "mov.u32 %r22,%r24;\n"
+-	 "bra $L21;\n"
+-	 "$L19:\n"
+-	 "mov.u32 %r25,%ctaid.z;\n"
+-	 "mov.u32 %r22,%r25;\n"
+-	 "bra $L21;\n"
+-	 "$L22:\n"
+-	 "{\n"
+-	 "{\n"
+-	 "call abort;\n"
+-	 "}\n"
+-	 "}\n"
+-	 "$L21:\n"
+-	 "mov.u32 %r26,%r22;\n"
+-	 "mov.u32 %retval,%r26;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n"
+-	 "{\n"
+-	 ".reg .u32 %ar1;\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 ".reg .pred %r30;\n"
+-	 ".reg .u32 %r31;\n"
+-	 ".reg .pred %r32;\n"
+-	 ".reg .u32 %r33;\n"
+-	 ".reg .pred %r34;\n"
+-	 ".local .align 8 .b8 %frame[4];\n"
+-	 "ld.param.u32 %ar1,[%in_ar1];\n"
+-	 "mov.u32 %r27,%ar1;\n"
+-	 "st.local.u32 [%frame],%r27;\n"
+-	 "ld.local.u32 %r28,[%frame];\n"
+-	 "mov.u32 %r29,1;\n"
+-	 "setp.eq.u32 %r30,%r28,%r29;\n"
+-	 "@%r30 bra $L25;\n"
+-	 "mov.u32 %r31,2;\n"
+-	 "setp.eq.u32 %r32,%r28,%r31;\n"
+-	 "@%r32 bra $L26;\n"
+-	 "mov.u32 %r33,0;\n"
+-	 "setp.eq.u32 %r34,%r28,%r33;\n"
+-	 "@!%r34 bra $L29;\n"
+-	 "mov.u32 %r23,%nctaid.x;\n"
+-	 "mov.u32 %r22,%r23;\n"
+-	 "bra $L28;\n"
+-	 "$L25:\n"
+-	 "mov.u32 %r24,%nctaid.y;\n"
+-	 "mov.u32 %r22,%r24;\n"
+-	 "bra $L28;\n"
+-	 "$L26:\n"
+-	 "mov.u32 %r25,%nctaid.z;\n"
+-	 "mov.u32 %r22,%r25;\n"
+-	 "bra $L28;\n"
+-	 "$L29:\n"
+-	 "{\n"
+-	 "{\n"
+-	 "call abort;\n"
+-	 "}\n"
+-	 "}\n"
+-	 "$L28:\n"
+-	 "mov.u32 %r26,%r22;\n"
+-	 "mov.u32 %retval,%r26;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n"
+-	 "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"
+-	 "{\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 "mov.u32 %r26,0;\n"
+-	 "{\n"
+-	 ".param .u32 %retval_in;\n"
+-	 "{\n"
+-	 ".param .u32 %out_arg0;\n"
+-	 "st.param.u32 [%out_arg0],%r26;\n"
+-	 "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+-	 "}\n"
+-	 "ld.param.u32 %r27,[%retval_in];\n"
+-	 "}\n"
+-	 "mov.u32 %r22,%r27;\n"
+-	 "mov.u32 %r28,0;\n"
+-	 "{\n"
+-	 ".param .u32 %retval_in;\n"
+-	 "{\n"
+-	 ".param .u32 %out_arg0;\n"
+-	 "st.param.u32 [%out_arg0],%r28;\n"
+-	 "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"
+-	 "}\n"
+-	 "ld.param.u32 %r29,[%retval_in];\n"
+-	 "}\n"
+-	 "mov.u32 %r23,%r29;\n"
+-	 "mul.lo.u32 %r24,%r22,%r23;\n"
+-	 "mov.u32 %r25,%r24;\n"
+-	 "mov.u32 %retval,%r25;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n"
+-	 "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n"
+-	 ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"
+-	 "{\n"
+-	 ".reg .u32 %retval;\n"
+-	 ".reg .u64 %hr10;\n"
+-	 ".reg .u32 %r22;\n"
+-	 ".reg .u32 %r23;\n"
+-	 ".reg .u32 %r24;\n"
+-	 ".reg .u32 %r25;\n"
+-	 ".reg .u32 %r26;\n"
+-	 ".reg .u32 %r27;\n"
+-	 ".reg .u32 %r28;\n"
+-	 ".reg .u32 %r29;\n"
+-	 ".reg .u32 %r30;\n"
+-	 ".reg .u32 %r31;\n"
+-	 ".reg .u32 %r32;\n"
+-	 ".reg .u32 %r33;\n"
+-	 "mov.u32 %r28,0;\n"
+-	 "{\n"
+-	 ".param .u32 %retval_in;\n"
+-	 "{\n"
+-	 ".param .u32 %out_arg0;\n"
+-	 "st.param.u32 [%out_arg0],%r28;\n"
+-	 "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+-	 "}\n"
+-	 "ld.param.u32 %r29,[%retval_in];\n"
+-	 "}\n"
+-	 "mov.u32 %r22,%r29;\n"
+-	 "mov.u32 %r30,0;\n"
+-	 "{\n"
+-	 ".param .u32 %retval_in;\n"
+-	 "{\n"
+-	 ".param .u32 %out_arg0;\n"
+-	 "st.param.u32 [%out_arg0],%r30;\n"
+-	 "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"
+-	 "}\n"
+-	 "ld.param.u32 %r31,[%retval_in];\n"
+-	 "}\n"
+-	 "mov.u32 %r23,%r31;\n"
+-	 "mul.lo.u32 %r24,%r22,%r23;\n"
+-	 "mov.u32 %r32,0;\n"
+-	 "{\n"
+-	 ".param .u32 %retval_in;\n"
+-	 "{\n"
+-	 ".param .u32 %out_arg0;\n"
+-	 "st.param.u32 [%out_arg0],%r32;\n"
+-	 "call (%retval_in),GOACC_tid,(%out_arg0);\n"
+-	 "}\n"
+-	 "ld.param.u32 %r33,[%retval_in];\n"
+-	 "}\n"
+-	 "mov.u32 %r25,%r33;\n"
+-	 "add.u32 %r26,%r24,%r25;\n"
+-	 "mov.u32 %r27,%r26;\n"
+-	 "mov.u32 %retval,%r27;\n"
+-	 "st.param.u32 [%out_retval],%retval;\n"
+-	 "ret;\n"
+-	 "}\n");
+--- libgomp/config/nvptx/target.c.jj	2018-04-25 09:40:31.890655570 +0200
++++ libgomp/config/nvptx/target.c	2019-05-07 18:46:36.453110901 +0200
+@@ -47,3 +47,21 @@ GOMP_teams (unsigned int num_teams, unsi
+     }
+   gomp_num_teams_var = num_teams - 1;
+ }
++
++int
++omp_pause_resource (omp_pause_resource_t kind, int device_num)
++{
++  (void) kind;
++  (void) device_num;
++  return -1;
++}
++
++int
++omp_pause_resource_all (omp_pause_resource_t kind)
++{
++  (void) kind;
++  return -1;
++}
++
++ialias (omp_pause_resource)
++ialias (omp_pause_resource_all)
+--- libgomp/config/nvptx/icv-device.c.jj	2018-04-25 09:40:31.889655570 +0200
++++ libgomp/config/nvptx/icv-device.c	2019-05-07 18:46:36.453110901 +0200
+@@ -46,20 +46,6 @@ omp_get_num_devices (void)
+ }
+ 
+ int
+-omp_get_num_teams (void)
+-{
+-  return gomp_num_teams_var + 1;
+-}
+-
+-int
+-omp_get_team_num (void)
+-{
+-  int ctaid;
+-  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
+-  return ctaid;
+-}
+-
+-int
+ omp_is_initial_device (void)
+ {
+   /* NVPTX is an accelerator-only target.  */
+@@ -69,6 +55,4 @@ omp_is_initial_device (void)
+ ialias (omp_set_default_device)
+ ialias (omp_get_default_device)
+ ialias (omp_get_num_devices)
+-ialias (omp_get_num_teams)
+-ialias (omp_get_team_num)
+ ialias (omp_is_initial_device)
+--- libgomp/config/nvptx/affinity-fmt.c.jj	2019-05-07 18:46:36.358112419 +0200
++++ libgomp/config/nvptx/affinity-fmt.c	2019-05-07 18:46:36.358112419 +0200
+@@ -0,0 +1,51 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include <string.h>
++#include <stdio.h>
++#include <stdlib.h>
++#ifdef HAVE_UNISTD_H
++#include <unistd.h>
++#endif
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIx64.  */
++#endif
++#ifdef HAVE_UNAME
++#include <sys/utsname.h>
++#endif
++
++/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
++   while the nvptx newlib implementation does not support those functions.
++   Override the configure test results here.  */
++#undef HAVE_GETPID
++#undef HAVE_GETHOSTNAME
++
++/* The nvptx newlib implementation does not support fwrite, but it does support
++   write.  Map fwrite to write.  */
++#undef fwrite
++#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
++
++#include "../../affinity-fmt.c"
++
+--- libgomp/config/mingw32/affinity-fmt.c.jj	2019-05-07 18:46:36.344112642 +0200
++++ libgomp/config/mingw32/affinity-fmt.c	2019-05-07 18:46:36.344112642 +0200
+@@ -0,0 +1,68 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include <string.h>
++#include <stdio.h>
++#include <stdlib.h>
++#ifdef HAVE_UNISTD_H
++#include <unistd.h>
++#endif
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIx64.  */
++#endif
++#define WIN32_LEAN_AND_MEAN
++#include <windows.h>
++#include <errno.h>
++
++static int
++gomp_gethostname (char *name, size_t len)
++{
++  /* On Win9x GetComputerName fails if the input size is less
++     than MAX_COMPUTERNAME_LENGTH + 1.  */
++  char buffer[MAX_COMPUTERNAME_LENGTH + 1];
++  DWORD size = sizeof (buffer);
++  int ret = 0;
++
++  if (!GetComputerName (buffer, &size))
++    return -1;
++
++  if ((size = strlen (buffer) + 1) > len)
++    {
++      errno = EINVAL;
++      /* Truncate as per POSIX spec.  We do not NUL-terminate. */
++      size = len;
++      ret = -1;
++    }
++  memcpy (name, buffer, (size_t) size);
++
++  return ret;
++}
++
++#undef gethostname
++#define gethostname gomp_gethostname
++#define  HAVE_GETHOSTNAME 1
++
++#include "../../affinity-fmt.c"
+--- libgomp/config/rtems/bar.c.jj	2018-04-25 09:40:31.902655576 +0200
++++ libgomp/config/rtems/bar.c	2019-05-07 18:46:36.460110789 +0200
+@@ -72,184 +72,5 @@ do_wait (int *addr, int val)
+     futex_wait (addr, val);
+ }
+ 
+-/* Everything below this point should be identical to the Linux
+-   implementation.  */
+-
+-void
+-gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+-{
+-  if (__builtin_expect (state & BAR_WAS_LAST, 0))
+-    {
+-      /* Next time we'll be awaiting TOTAL threads again.  */
+-      bar->awaited = bar->total;
+-      __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
+-			MEMMODEL_RELEASE);
+-      futex_wake ((int *) &bar->generation, INT_MAX);
+-    }
+-  else
+-    {
+-      do
+-	do_wait ((int *) &bar->generation, state);
+-      while (__atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) == state);
+-    }
+-}
+-
+-void
+-gomp_barrier_wait (gomp_barrier_t *bar)
+-{
+-  gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-/* Like gomp_barrier_wait, except that if the encountering thread
+-   is not the last one to hit the barrier, it returns immediately.
+-   The intended usage is that a thread which intends to gomp_barrier_destroy
+-   this barrier calls gomp_barrier_wait, while all other threads
+-   call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
+-   the barrier can be safely destroyed.  */
+-
+-void
+-gomp_barrier_wait_last (gomp_barrier_t *bar)
+-{
+-  gomp_barrier_state_t state = gomp_barrier_wait_start (bar);
+-  if (state & BAR_WAS_LAST)
+-    gomp_barrier_wait_end (bar, state);
+-}
+-
+-void
+-gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
+-{
+-  futex_wake ((int *) &bar->generation, count == 0 ? INT_MAX : count);
+-}
+-
+-void
+-gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
+-{
+-  unsigned int generation, gen;
+-
+-  if (__builtin_expect (state & BAR_WAS_LAST, 0))
+-    {
+-      /* Next time we'll be awaiting TOTAL threads again.  */
+-      struct gomp_thread *thr = gomp_thread ();
+-      struct gomp_team *team = thr->ts.team;
+-
+-      bar->awaited = bar->total;
+-      team->work_share_cancelled = 0;
+-      if (__builtin_expect (team->task_count, 0))
+-	{
+-	  gomp_barrier_handle_tasks (state);
+-	  state &= ~BAR_WAS_LAST;
+-	}
+-      else
+-	{
+-	  state &= ~BAR_CANCELLED;
+-	  state += BAR_INCR - BAR_WAS_LAST;
+-	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+-	  futex_wake ((int *) &bar->generation, INT_MAX);
+-	  return;
+-	}
+-    }
+-
+-  generation = state;
+-  state &= ~BAR_CANCELLED;
+-  do
+-    {
+-      do_wait ((int *) &bar->generation, generation);
+-      gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+-      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+-	{
+-	  gomp_barrier_handle_tasks (state);
+-	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+-	}
+-      generation |= gen & BAR_WAITING_FOR_TASK;
+-    }
+-  while (gen != state + BAR_INCR);
+-}
+-
+-void
+-gomp_team_barrier_wait (gomp_barrier_t *bar)
+-{
+-  gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-void
+-gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+-{
+-  gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
+-  if (__builtin_expect (state & BAR_WAS_LAST, 0))
+-    bar->awaited_final = bar->total;
+-  gomp_team_barrier_wait_end (bar, state);
+-}
+-
+-bool
+-gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+-				   gomp_barrier_state_t state)
+-{
+-  unsigned int generation, gen;
+-
+-  if (__builtin_expect (state & BAR_WAS_LAST, 0))
+-    {
+-      /* Next time we'll be awaiting TOTAL threads again.  */
+-      /* BAR_CANCELLED should never be set in state here, because
+-	 cancellation means that at least one of the threads has been
+-	 cancelled, thus on a cancellable barrier we should never see
+-	 all threads to arrive.  */
+-      struct gomp_thread *thr = gomp_thread ();
+-      struct gomp_team *team = thr->ts.team;
+-
+-      bar->awaited = bar->total;
+-      team->work_share_cancelled = 0;
+-      if (__builtin_expect (team->task_count, 0))
+-	{
+-	  gomp_barrier_handle_tasks (state);
+-	  state &= ~BAR_WAS_LAST;
+-	}
+-      else
+-	{
+-	  state += BAR_INCR - BAR_WAS_LAST;
+-	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+-	  futex_wake ((int *) &bar->generation, INT_MAX);
+-	  return false;
+-	}
+-    }
+-
+-  if (__builtin_expect (state & BAR_CANCELLED, 0))
+-    return true;
+-
+-  generation = state;
+-  do
+-    {
+-      do_wait ((int *) &bar->generation, generation);
+-      gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+-      if (__builtin_expect (gen & BAR_CANCELLED, 0))
+-	return true;
+-      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+-	{
+-	  gomp_barrier_handle_tasks (state);
+-	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+-	}
+-      generation |= gen & BAR_WAITING_FOR_TASK;
+-    }
+-  while (gen != state + BAR_INCR);
+-
+-  return false;
+-}
+-
+-bool
+-gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+-{
+-  return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
+-}
+-
+-void
+-gomp_team_barrier_cancel (struct gomp_team *team)
+-{
+-  gomp_mutex_lock (&team->task_lock);
+-  if (team->barrier.generation & BAR_CANCELLED)
+-    {
+-      gomp_mutex_unlock (&team->task_lock);
+-      return;
+-    }
+-  team->barrier.generation |= BAR_CANCELLED;
+-  gomp_mutex_unlock (&team->task_lock);
+-  futex_wake ((int *) &team->barrier.generation, INT_MAX);
+-}
++#define GOMP_WAIT_H 1
++#include "../linux/bar.c"
+--- libgomp/config/rtems/affinity-fmt.c.jj	2019-05-07 18:46:36.459110805 +0200
++++ libgomp/config/rtems/affinity-fmt.c	2019-05-07 18:46:36.459110805 +0200
+@@ -0,0 +1,49 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include <string.h>
++#include <stdio.h>
++#include <stdlib.h>
++#ifdef HAVE_UNISTD_H
++#include <unistd.h>
++#endif
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIx64.  */
++#endif
++#ifdef HAVE_UNAME
++#include <sys/utsname.h>
++#endif
++
++/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for RTEMS,
++   but the extra information they give are of little value for the user.
++   Override the configure test results here.  */
++#undef HAVE_GETPID
++#undef HAVE_GETHOSTNAME
++
++/* Avoid the complex fwrite() in favour of the simple write().  */
++#undef fwrite
++#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
++
++#include "../../affinity-fmt.c"
+--- libgomp/config.h.in.jj	2018-04-25 09:40:31.870655561 +0200
++++ libgomp/config.h.in	2019-05-07 18:46:36.465110710 +0200
+@@ -1,5 +1,8 @@
+ /* config.h.in.  Generated from configure.ac by autoheader.  */
+ 
++/* Define to 1 if you have the `aligned_alloc' function. */
++#undef HAVE_ALIGNED_ALLOC
++
+ /* Define to 1 if the target assembler supports .symver directive. */
+ #undef HAVE_AS_SYMVER_DIRECTIVE
+ 
+@@ -33,9 +36,15 @@
+ /* Define to 1 if you have the `getgid' function. */
+ #undef HAVE_GETGID
+ 
++/* Define if gethostname is supported. */
++#undef HAVE_GETHOSTNAME
++
+ /* Define to 1 if you have the `getloadavg' function. */
+ #undef HAVE_GETLOADAVG
+ 
++/* Define if getpid is supported. */
++#undef HAVE_GETPID
++
+ /* Define to 1 if you have the `getuid' function. */
+ #undef HAVE_GETUID
+ 
+@@ -45,9 +54,15 @@
+ /* Define to 1 if you have the `dl' library (-ldl). */
+ #undef HAVE_LIBDL
+ 
++/* Define to 1 if you have the `memalign' function. */
++#undef HAVE_MEMALIGN
++
+ /* Define to 1 if you have the <memory.h> header file. */
+ #undef HAVE_MEMORY_H
+ 
++/* Define to 1 if you have the `posix_memalign' function. */
++#undef HAVE_POSIX_MEMALIGN
++
+ /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */
+ #undef HAVE_PTHREAD_AFFINITY_NP
+ 
+@@ -103,9 +118,15 @@
+ /* Define to 1 if the target supports thread-local storage. */
+ #undef HAVE_TLS
+ 
++/* Define if uname is supported and struct utsname has nodename field. */
++#undef HAVE_UNAME
++
+ /* Define to 1 if you have the <unistd.h> header file. */
+ #undef HAVE_UNISTD_H
+ 
++/* Define to 1 if you have the `_aligned_malloc' function. */
++#undef HAVE__ALIGNED_MALLOC
++
+ /* Define to 1 if you have the `__secure_getenv' function. */
+ #undef HAVE___SECURE_GETENV
+ 
+@@ -125,8 +146,8 @@
+    */
+ #undef LT_OBJDIR
+ 
+-/* Define to offload targets, separated by commas. */
+-#undef OFFLOAD_TARGETS
++/* Define to offload plugins, separated by commas. */
++#undef OFFLOAD_PLUGINS
+ 
+ /* Name of package */
+ #undef PACKAGE
+--- libgomp/teams.c.jj	2019-05-07 18:46:36.548109384 +0200
++++ libgomp/teams.c	2019-05-07 18:46:36.548109384 +0200
+@@ -0,0 +1,74 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file handles the host TEAMS construct.  */
++
++#include "libgomp.h"
++#include <limits.h>
++
++static unsigned gomp_num_teams = 1, gomp_team_num = 0;
++
++void
++GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
++		unsigned int thread_limit, unsigned int flags)
++{
++  (void) flags;
++  (void) num_teams;
++  unsigned old_thread_limit_var = 0;
++  if (thread_limit)
++    {
++      struct gomp_task_icv *icv = gomp_icv (true);
++      old_thread_limit_var = icv->thread_limit_var;
++      icv->thread_limit_var
++	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
++    }
++  if (num_teams == 0)
++    num_teams = 3;
++  gomp_num_teams = num_teams;
++  for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++)
++    fn (data);
++  gomp_num_teams = 1;
++  gomp_team_num = 0;
++  if (thread_limit)
++    {
++      struct gomp_task_icv *icv = gomp_icv (true);
++      icv->thread_limit_var = old_thread_limit_var;
++    }
++}
++
++int
++omp_get_num_teams (void)
++{
++  return gomp_num_teams;
++}
++
++int
++omp_get_team_num (void)
++{
++  return gomp_team_num;
++}
++
++ialias (omp_get_num_teams)
++ialias (omp_get_team_num)
+--- libgomp/libgomp.map.jj	2018-04-25 09:40:31.321655307 +0200
++++ libgomp/libgomp.map	2019-05-07 18:46:36.525109751 +0200
+@@ -164,6 +164,22 @@ OMP_4.5 {
+ 	omp_target_disassociate_ptr;
+ } OMP_4.0;
+ 
++OMP_5.0 {
++  global:
++	omp_capture_affinity;
++	omp_capture_affinity_;
++	omp_display_affinity;
++	omp_display_affinity_;
++	omp_get_affinity_format;
++	omp_get_affinity_format_;
++	omp_set_affinity_format;
++	omp_set_affinity_format_;
++	omp_pause_resource;
++	omp_pause_resource_;
++	omp_pause_resource_all;
++	omp_pause_resource_all_;
++} OMP_4.5;
++
+ GOMP_1.0 {
+   global:
+ 	GOMP_atomic_end;
+@@ -298,6 +314,34 @@ GOMP_4.5 {
+ 	GOMP_parallel_loop_nonmonotonic_guided;
+ } GOMP_4.0.1;
+ 
++GOMP_5.0 {
++  global:
++	GOMP_loop_doacross_start;
++	GOMP_loop_maybe_nonmonotonic_runtime_next;
++	GOMP_loop_maybe_nonmonotonic_runtime_start;
++	GOMP_loop_nonmonotonic_runtime_next;
++	GOMP_loop_nonmonotonic_runtime_start;
++	GOMP_loop_ordered_start;
++	GOMP_loop_start;
++	GOMP_loop_ull_doacross_start;
++	GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
++	GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
++	GOMP_loop_ull_nonmonotonic_runtime_next;
++	GOMP_loop_ull_nonmonotonic_runtime_start;
++	GOMP_loop_ull_ordered_start;
++	GOMP_loop_ull_start;
++	GOMP_parallel_loop_maybe_nonmonotonic_runtime;
++	GOMP_parallel_loop_nonmonotonic_runtime;
++	GOMP_parallel_reductions;
++	GOMP_sections2_start;
++	GOMP_taskgroup_reduction_register;
++	GOMP_taskgroup_reduction_unregister;
++	GOMP_task_reduction_remap;
++	GOMP_taskwait_depend;
++	GOMP_teams_reg;
++	GOMP_workshare_task_reduction_unregister;
++} GOMP_4.5;
++
+ OACC_2.0 {
+   global:
+ 	acc_get_num_devices;
+@@ -386,6 +430,52 @@ OACC_2.0.1 {
+ 	acc_pcreate;
+ } OACC_2.0;
+ 
++OACC_2.5 {
++  global:
++	acc_copyin_async;
++	acc_copyin_async_32_h_;
++	acc_copyin_async_64_h_;
++	acc_copyin_async_array_h_;
++	acc_copyout_async;
++	acc_copyout_async_32_h_;
++	acc_copyout_async_64_h_;
++	acc_copyout_async_array_h_;
++	acc_copyout_finalize;
++	acc_copyout_finalize_32_h_;
++	acc_copyout_finalize_64_h_;
++	acc_copyout_finalize_array_h_;
++	acc_copyout_finalize_async;
++	acc_copyout_finalize_async_32_h_;
++	acc_copyout_finalize_async_64_h_;
++	acc_copyout_finalize_async_array_h_;
++	acc_create_async;
++	acc_create_async_32_h_;
++	acc_create_async_64_h_;
++	acc_create_async_array_h_;
++	acc_delete_async;
++	acc_delete_async_32_h_;
++	acc_delete_async_64_h_;
++	acc_delete_async_array_h_;
++	acc_delete_finalize;
++	acc_delete_finalize_32_h_;
++	acc_delete_finalize_64_h_;
++	acc_delete_finalize_array_h_;
++	acc_delete_finalize_async;
++	acc_delete_finalize_async_32_h_;
++	acc_delete_finalize_async_64_h_;
++	acc_delete_finalize_async_array_h_;
++	acc_memcpy_from_device_async;
++	acc_memcpy_to_device_async;
++	acc_update_device_async;
++	acc_update_device_async_32_h_;
++	acc_update_device_async_64_h_;
++	acc_update_device_async_array_h_;
++	acc_update_self_async;
++	acc_update_self_async_32_h_;
++	acc_update_self_async_64_h_;
++	acc_update_self_async_array_h_;
++} OACC_2.0.1;
++
+ GOACC_2.0 {
+   global:
+ 	GOACC_data_end;
+@@ -420,3 +510,8 @@ GOMP_PLUGIN_1.1 {
+   global:
+ 	GOMP_PLUGIN_target_task_completion;
+ } GOMP_PLUGIN_1.0;
++
++GOMP_PLUGIN_1.2 {
++  global:
++	GOMP_PLUGIN_acc_default_dim;
++} GOMP_PLUGIN_1.1;
+--- libgomp/oacc-async.c.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/oacc-async.c	2019-05-07 18:46:36.528109704 +0200
+@@ -34,7 +34,7 @@
+ int
+ acc_async_test (int async)
+ {
+-  if (async < acc_async_sync)
++  if (!async_valid_p (async))
+     gomp_fatal ("invalid async argument: %d", async);
+ 
+   struct goacc_thread *thr = goacc_thread ();
+@@ -59,7 +59,7 @@ acc_async_test_all (void)
+ void
+ acc_wait (int async)
+ {
+-  if (async < acc_async_sync)
++  if (!async_valid_p (async))
+     gomp_fatal ("invalid async argument: %d", async);
+ 
+   struct goacc_thread *thr = goacc_thread ();
+@@ -117,7 +117,7 @@ acc_async_wait_all (void)
+ void
+ acc_wait_all_async (int async)
+ {
+-  if (async < acc_async_sync)
++  if (!async_valid_p (async))
+     gomp_fatal ("invalid async argument: %d", async);
+ 
+   struct goacc_thread *thr = goacc_thread ();
+--- libgomp/loop_ull.c.jj	2018-04-25 09:40:31.912655580 +0200
++++ libgomp/loop_ull.c	2019-05-07 18:46:36.527109719 +0200
+@@ -27,8 +27,12 @@
+ 
+ #include <limits.h>
+ #include <stdlib.h>
++#include <string.h>
+ #include "libgomp.h"
+ 
++ialias (GOMP_loop_ull_runtime_next)
++ialias_redirect (GOMP_taskgroup_reduction_register)
++
+ typedef unsigned long long gomp_ull;
+ 
+ /* Initialize the given work share construct from the given arguments.  */
+@@ -104,7 +108,7 @@ gomp_loop_ull_static_start (bool up, gom
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_STATIC, chunk_size);
+@@ -122,7 +126,7 @@ gomp_loop_ull_dynamic_start (bool up, go
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_DYNAMIC, chunk_size);
+@@ -148,7 +152,7 @@ gomp_loop_ull_guided_start (bool up, gom
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_GUIDED, chunk_size);
+@@ -171,7 +175,7 @@ GOMP_loop_ull_runtime_start (bool up, go
+ 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_ull_static_start (up, start, end, incr,
+@@ -195,6 +199,99 @@ GOMP_loop_ull_runtime_start (bool up, go
+     }
+ }
+ 
++static long
++gomp_adjust_sched (long sched, gomp_ull *chunk_size)
++{
++  sched &= ~GFS_MONOTONIC;
++  switch (sched)
++    {
++    case GFS_STATIC:
++    case GFS_DYNAMIC:
++    case GFS_GUIDED:
++      return sched;
++    /* GFS_RUNTIME is used for runtime schedule without monotonic
++       or nonmonotonic modifiers on the clause.
++       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
++       modifier.  */
++    case GFS_RUNTIME:
++    /* GFS_AUTO is used for runtime schedule with nonmonotonic
++       modifier.  */
++    case GFS_AUTO:
++      {
++	struct gomp_task_icv *icv = gomp_icv (false);
++	sched = icv->run_sched_var & ~GFS_MONOTONIC;
++	switch (sched)
++	  {
++	  case GFS_STATIC:
++	  case GFS_DYNAMIC:
++	  case GFS_GUIDED:
++	    *chunk_size = icv->run_sched_chunk_size;
++	    break;
++	  case GFS_AUTO:
++	    sched = GFS_STATIC;
++	    *chunk_size = 0;
++	    break;
++	  default:
++	    abort ();
++	  }
++	return sched;
++      }
++    default:
++      abort ();
++    }
++}
++
++bool
++GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
++		     gomp_ull incr, long sched, gomp_ull chunk_size,
++		     gomp_ull *istart, gomp_ull *iend,
++		     uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (gomp_work_share_start (0))
++    {
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
++      			  sched, chunk_size);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      if (mem)
++	{
++	  uintptr_t size = (uintptr_t) *mem;
++	  if (size > (sizeof (struct gomp_work_share)
++		      - offsetof (struct gomp_work_share,
++				  inline_ordered_team_ids)))
++	    thr->ts.work_share->ordered_team_ids
++	      = gomp_malloc_cleared (size);
++	  else
++	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
++	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
++	}
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      if (mem)
++	*mem = (void *) thr->ts.work_share->ordered_team_ids;
++    }
++
++  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
++}
++
+ /* The *_ordered_*_start routines are similar.  The only difference is that
+    this work-share construct is initialized to expect an ORDERED section.  */
+ 
+@@ -206,7 +303,7 @@ gomp_loop_ull_ordered_static_start (bool
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_STATIC, chunk_size);
+@@ -225,7 +322,7 @@ gomp_loop_ull_ordered_dynamic_start (boo
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_DYNAMIC, chunk_size);
+@@ -251,7 +348,7 @@ gomp_loop_ull_ordered_guided_start (bool
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (true))
++  if (gomp_work_share_start (1))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ 			  GFS_GUIDED, chunk_size);
+@@ -275,7 +372,7 @@ GOMP_loop_ull_ordered_runtime_start (boo
+ 				     gomp_ull *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
+@@ -299,6 +396,82 @@ GOMP_loop_ull_ordered_runtime_start (boo
+     }
+ }
+ 
++bool
++GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
++			     gomp_ull incr, long sched, gomp_ull chunk_size,
++			     gomp_ull *istart, gomp_ull *iend,
++			     uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  size_t ordered = 1;
++  bool ret;
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (mem)
++    ordered += (uintptr_t) *mem;
++  if (gomp_work_share_start (ordered))
++    {
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
++			  sched, chunk_size);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      if (sched == GFS_STATIC)
++	gomp_ordered_static_init ();
++      else
++	gomp_mutex_lock (&thr->ts.work_share->lock);
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      sched = thr->ts.work_share->sched;
++      if (sched != GFS_STATIC)
++	gomp_mutex_lock (&thr->ts.work_share->lock);
++    }
++
++  if (mem)
++    {
++      uintptr_t p
++	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
++		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
++      p += __alignof__ (long long) - 1;
++      p &= ~(__alignof__ (long long) - 1);
++      *mem = (void *) p;
++    }
++
++  switch (sched)
++    {
++    case GFS_STATIC:
++    case GFS_AUTO:
++      return !gomp_iter_ull_static_next (istart, iend);
++    case GFS_DYNAMIC:
++      ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
++      break;
++    case GFS_GUIDED:
++      ret = gomp_iter_ull_guided_next_locked (istart, iend);
++      break;
++    default:
++      abort ();
++    }
++
++  if (ret)
++    gomp_ordered_first ();
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++  return ret;
++}
++
+ /* The *_doacross_*_start routines are similar.  The only difference is that
+    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
+    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
+@@ -313,11 +486,11 @@ gomp_loop_ull_doacross_static_start (uns
+   struct gomp_thread *thr = gomp_thread ();
+ 
+   thr->ts.static_trip = 0;
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ 			  GFS_STATIC, chunk_size);
+-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -332,11 +505,11 @@ gomp_loop_ull_doacross_dynamic_start (un
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ 			  GFS_DYNAMIC, chunk_size);
+-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -359,11 +532,11 @@ gomp_loop_ull_doacross_guided_start (uns
+   struct gomp_thread *thr = gomp_thread ();
+   bool ret;
+ 
+-  if (gomp_work_share_start (false))
++  if (gomp_work_share_start (0))
+     {
+       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ 			  GFS_GUIDED, chunk_size);
+-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
+       gomp_work_share_init_done ();
+     }
+ 
+@@ -383,7 +556,7 @@ GOMP_loop_ull_doacross_runtime_start (un
+ 				      gomp_ull *istart, gomp_ull *iend)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+-  switch (icv->run_sched_var)
++  switch (icv->run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_STATIC:
+       return gomp_loop_ull_doacross_static_start (ncounts, counts,
+@@ -407,6 +580,51 @@ GOMP_loop_ull_doacross_runtime_start (un
+     }
+ }
+ 
++bool
++GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
++			      long sched, gomp_ull chunk_size,
++			      gomp_ull *istart, gomp_ull *iend,
++			      uintptr_t *reductions, void **mem)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (reductions)
++    gomp_workshare_taskgroup_start ();
++  if (gomp_work_share_start (0))
++    {
++      size_t extra = 0;
++      if (mem)
++	extra = (uintptr_t) *mem;
++      sched = gomp_adjust_sched (sched, &chunk_size);
++      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
++			  sched, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
++      if (reductions)
++	{
++	  GOMP_taskgroup_reduction_register (reductions);
++	  thr->task->taskgroup->workshare = true;
++	  thr->ts.work_share->task_reductions = reductions;
++	}
++      gomp_work_share_init_done ();
++    }
++  else
++    {
++      if (reductions)
++	{
++	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
++	  gomp_workshare_task_reduction_register (reductions,
++						  first_reductions);
++	}
++      sched = thr->ts.work_share->sched;
++    }
++
++  if (mem)
++    *mem = thr->ts.work_share->doacross->extra;
++
++  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
++}
++
+ /* The *_next routines are called when the thread completes processing of
+    the iteration block currently assigned to it.  If the work-share
+    construct is bound directly to a parallel construct, then the iteration
+@@ -570,6 +788,10 @@ extern __typeof(gomp_loop_ull_dynamic_st
+ 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
+ extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
+ 	__attribute__((alias ("gomp_loop_ull_guided_start")));
++extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
++	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
++extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
++	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
+ 
+ extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
+ 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
+@@ -595,6 +817,10 @@ extern __typeof(gomp_loop_ull_dynamic_ne
+ 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
+ extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
+ 	__attribute__((alias ("gomp_loop_ull_guided_next")));
++extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
++	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
++extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
++	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
+ 
+ extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
+ 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
+@@ -650,6 +876,23 @@ GOMP_loop_ull_nonmonotonic_guided_start
+ }
+ 
+ bool
++GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
++					  gomp_ull end, gomp_ull incr,
++					  gomp_ull *istart, gomp_ull *iend)
++{
++  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
++}
++
++bool
++GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
++						gomp_ull end, gomp_ull incr,
++						gomp_ull *istart,
++						gomp_ull *iend)
++{
++  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
++}
++
++bool
+ GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
+ 				    gomp_ull incr, gomp_ull chunk_size,
+ 				    gomp_ull *istart, gomp_ull *iend)
+@@ -734,6 +977,19 @@ GOMP_loop_ull_nonmonotonic_guided_next (
+ }
+ 
+ bool
++GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
++{
++  return GOMP_loop_ull_runtime_next (istart, iend);
++}
++
++bool
++GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
++					       gomp_ull *iend)
++{
++  return GOMP_loop_ull_runtime_next (istart, iend);
++}
++
++bool
+ GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
+ {
+   return gomp_loop_ull_ordered_static_next (istart, iend);
+--- libgomp/oacc-int.h.jj	2018-04-25 09:40:31.320655306 +0200
++++ libgomp/oacc-int.h	2019-05-07 18:46:36.529109688 +0200
+@@ -99,6 +99,28 @@ void goacc_restore_bind (void);
+ void goacc_lazy_initialize (void);
+ void goacc_host_init (void);
+ 
++static inline bool
++async_valid_stream_id_p (int async)
++{
++  return async >= 0;
++}
++
++static inline bool
++async_valid_p (int async)
++{
++  return (async == acc_async_noval || async == acc_async_sync
++	  || async_valid_stream_id_p (async));
++}
++
++static inline bool
++async_synchronous_p (int async)
++{
++  if (!async_valid_p (async))
++    return true;
++
++  return async == acc_async_sync;
++}
++
+ #ifdef HAVE_ATTRIBUTE_VISIBILITY
+ # pragma GCC visibility pop
+ #endif
+--- libgomp/testsuite/Makefile.in.jj	2018-04-25 09:40:31.452655368 +0200
++++ libgomp/testsuite/Makefile.in	2019-05-07 18:51:35.754330084 +0200
+@@ -223,6 +223,7 @@ mkdir_p = @mkdir_p@
+ multi_basedir = @multi_basedir@
+ offload_additional_lib_paths = @offload_additional_lib_paths@
+ offload_additional_options = @offload_additional_options@
++offload_plugins = @offload_plugins@
+ offload_targets = @offload_targets@
+ oldincludedir = @oldincludedir@
+ pdfdir = @pdfdir@
+--- libgomp/task.c.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/task.c	2019-05-07 18:46:36.547109400 +0200
+@@ -166,21 +166,72 @@ gomp_task_handle_depend (struct gomp_tas
+ 			 void **depend)
+ {
+   size_t ndepend = (uintptr_t) depend[0];
+-  size_t nout = (uintptr_t) depend[1];
+   size_t i;
+   hash_entry_type ent;
+ 
++  if (ndepend)
++    {
++      /* depend[0] is total # */
++      size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
++      /* ndepend - nout is # of in: */
++      for (i = 0; i < ndepend; i++)
++	{
++	  task->depend[i].addr = depend[2 + i];
++	  task->depend[i].is_in = i >= nout;
++	}
++    }
++  else
++    {
++      ndepend = (uintptr_t) depend[1]; /* total # */
++      size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
++      size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
++      /* For now we treat mutexinoutset like out, which is compliant, but
++	 inefficient.  */
++      size_t nin = (uintptr_t) depend[4]; /* # of in: */
++      /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
++      size_t normal = nout + nmutexinoutset + nin;
++      size_t n = 0;
++      for (i = normal; i < ndepend; i++)
++	{
++	  void **d = (void **) (uintptr_t) depend[5 + i];
++	  switch ((uintptr_t) d[1])
++	    {
++	    case GOMP_DEPEND_OUT:
++	    case GOMP_DEPEND_INOUT:
++	    case GOMP_DEPEND_MUTEXINOUTSET:
++	      break;
++	    case GOMP_DEPEND_IN:
++	      continue;
++	    default:
++	      gomp_fatal ("unknown omp_depend_t dependence type %d",
++			  (int) (uintptr_t) d[1]);
++	    }
++	  task->depend[n].addr = d[0];
++	  task->depend[n++].is_in = 0;
++	}
++      for (i = 0; i < normal; i++)
++	{
++	  task->depend[n].addr = depend[5 + i];
++	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
++	}
++      for (i = normal; i < ndepend; i++)
++	{
++	  void **d = (void **) (uintptr_t) depend[5 + i];
++	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
++	    continue;
++	  task->depend[n].addr = d[0];
++	  task->depend[n++].is_in = 1;
++	}
++    }
+   task->depend_count = ndepend;
+   task->num_dependees = 0;
+   if (parent->depend_hash == NULL)
+     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+   for (i = 0; i < ndepend; i++)
+     {
+-      task->depend[i].addr = depend[2 + i];
+       task->depend[i].next = NULL;
+       task->depend[i].prev = NULL;
+       task->depend[i].task = task;
+-      task->depend[i].is_in = i >= nout;
+       task->depend[i].redundant = false;
+       task->depend[i].redundant_out = false;
+ 
+@@ -205,7 +256,7 @@ gomp_task_handle_depend (struct gomp_tas
+ 	      last = ent;
+ 
+ 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
+-	      if (i >= nout && ent->is_in)
++	      if (task->depend[i].is_in && ent->is_in)
+ 		continue;
+ 
+ 	      if (!ent->is_in)
+@@ -280,9 +331,18 @@ gomp_task_handle_depend (struct gomp_tas
+    then the task may be executed by any member of the team.
+ 
+    DEPEND is an array containing:
++     if depend[0] is non-zero, then:
+ 	depend[0]: number of depend elements.
+-	depend[1]: number of depend elements of type "out".
+-	depend[2..N+1]: address of [1..N]th depend element.  */
++	depend[1]: number of depend elements of type "out/inout".
++	depend[2..N+1]: address of [1..N]th depend element.
++     otherwise, when depend[0] is zero, then:
++	depend[1]: number of depend elements.
++	depend[2]: number of depend elements of type "out/inout".
++	depend[3]: number of depend elements of type "mutexinoutset".
++	depend[4]: number of depend elements of type "in".
++	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
++	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
++		   omp_depend_t objects.  */
+ 
+ void
+ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+@@ -303,10 +363,20 @@ GOMP_task (void (*fn) (void *), void *da
+ #endif
+ 
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+-  if (team
+-      && (gomp_team_barrier_cancelled (&team->barrier)
+-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+-    return;
++  if (__builtin_expect (gomp_cancel_var, 0) && team)
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return;
++	}
++    }
+ 
+   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
+     priority = 0;
+@@ -377,7 +447,7 @@ GOMP_task (void (*fn) (void *), void *da
+       size_t depend_size = 0;
+ 
+       if (flags & GOMP_TASK_FLAG_DEPEND)
+-	depend_size = ((uintptr_t) depend[0]
++	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
+ 		       * sizeof (struct gomp_task_depend_entry));
+       task = gomp_malloc (sizeof (*task) + depend_size
+ 			  + arg_size + arg_align - 1);
+@@ -404,14 +474,26 @@ GOMP_task (void (*fn) (void *), void *da
+       gomp_mutex_lock (&team->task_lock);
+       /* If parallel or taskgroup has been cancelled, don't start new
+ 	 tasks.  */
+-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+-			     || (taskgroup && taskgroup->cancelled))
+-			    && !task->copy_ctors_done, 0))
++      if (__builtin_expect (gomp_cancel_var, 0)
++	  && !task->copy_ctors_done)
+ 	{
+-	  gomp_mutex_unlock (&team->task_lock);
+-	  gomp_finish_task (task);
+-	  free (task);
+-	  return;
++	  if (gomp_team_barrier_cancelled (&team->barrier))
++	    {
++	    do_cancel:
++	      gomp_mutex_unlock (&team->task_lock);
++	      gomp_finish_task (task);
++	      free (task);
++	      return;
++	    }
++	  if (taskgroup)
++	    {
++	      if (taskgroup->cancelled)
++		goto do_cancel;
++	      if (taskgroup->workshare
++		  && taskgroup->prev
++		  && taskgroup->prev->cancelled)
++		goto do_cancel;
++	    }
+ 	}
+       if (taskgroup)
+ 	taskgroup->num_children++;
+@@ -463,6 +545,7 @@ GOMP_task (void (*fn) (void *), void *da
+ 
+ ialias (GOMP_taskgroup_start)
+ ialias (GOMP_taskgroup_end)
++ialias (GOMP_taskgroup_reduction_register)
+ 
+ #define TYPE long
+ #define UTYPE unsigned long
+@@ -601,10 +684,20 @@ gomp_create_target_task (struct gomp_dev
+   struct gomp_team *team = thr->ts.team;
+ 
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+-  if (team
+-      && (gomp_team_barrier_cancelled (&team->barrier)
+-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+-    return true;
++  if (__builtin_expect (gomp_cancel_var, 0) && team)
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return true;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return true;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return true;
++	}
++    }
+ 
+   struct gomp_target_task *ttask;
+   struct gomp_task *task;
+@@ -617,7 +710,7 @@ gomp_create_target_task (struct gomp_dev
+ 
+   if (depend != NULL)
+     {
+-      depend_cnt = (uintptr_t) depend[0];
++      depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
+       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
+     }
+   if (fn)
+@@ -687,13 +780,25 @@ gomp_create_target_task (struct gomp_dev
+   task->final_task = 0;
+   gomp_mutex_lock (&team->task_lock);
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+-  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
+-			|| (taskgroup && taskgroup->cancelled), 0))
++  if (__builtin_expect (gomp_cancel_var, 0))
+     {
+-      gomp_mutex_unlock (&team->task_lock);
+-      gomp_finish_task (task);
+-      free (task);
+-      return true;
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	{
++	do_cancel:
++	  gomp_mutex_unlock (&team->task_lock);
++	  gomp_finish_task (task);
++	  free (task);
++	  return true;
++	}
++      if (taskgroup)
++	{
++	  if (taskgroup->cancelled)
++	    goto do_cancel;
++	  if (taskgroup->workshare
++	      && taskgroup->prev
++	      && taskgroup->prev->cancelled)
++	    goto do_cancel;
++	}
+     }
+   if (depend_size)
+     {
+@@ -986,10 +1091,21 @@ gomp_task_run_pre (struct gomp_task *chi
+ 
+   if (--team->task_queued_count == 0)
+     gomp_team_barrier_clear_task_pending (&team->barrier);
+-  if ((gomp_team_barrier_cancelled (&team->barrier)
+-       || (taskgroup && taskgroup->cancelled))
++  if (__builtin_expect (gomp_cancel_var, 0)
+       && !child_task->copy_ctors_done)
+-    return true;
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return true;
++      if (taskgroup)
++	{
++	  if (taskgroup->cancelled)
++	    return true;
++	  if (taskgroup->workshare
++	      && taskgroup->prev
++	      && taskgroup->prev->cancelled)
++	    return true;
++	}
++    }
+   return false;
+ }
+ 
+@@ -1456,6 +1572,35 @@ GOMP_taskwait (void)
+     }
+ }
+ 
++/* Called when encountering a taskwait directive with depend clause(s).
++   Wait as if it was an mergeable included task construct with empty body.  */
++
++void
++GOMP_taskwait_depend (void **depend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++
++  /* If parallel or taskgroup has been cancelled, return early.  */
++  if (__builtin_expect (gomp_cancel_var, 0) && team)
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return;
++	}
++    }
++
++  if (thr->task && thr->task->depend_hash)
++    gomp_task_maybe_wait_for_dependencies (depend);
++}
++
+ /* An undeferred task is about to run.  Wait for all tasks that this
+    undeferred task depends on.
+ 
+@@ -1464,7 +1609,7 @@ GOMP_taskwait (void)
+    the scheduling queues.  Then we iterate through these imminently
+    ready tasks (and possibly other high priority tasks), and run them.
+    If we run out of ready dependencies to execute, we either wait for
+-   the reamining dependencies to finish, or wait for them to get
++   the remaining dependencies to finish, or wait for them to get
+    scheduled so we can run them.
+ 
+    DEPEND is as in GOMP_task.  */
+@@ -1477,21 +1622,50 @@ gomp_task_maybe_wait_for_dependencies (v
+   struct gomp_team *team = thr->ts.team;
+   struct gomp_task_depend_entry elem, *ent = NULL;
+   struct gomp_taskwait taskwait;
+-  size_t ndepend = (uintptr_t) depend[0];
++  size_t orig_ndepend = (uintptr_t) depend[0];
+   size_t nout = (uintptr_t) depend[1];
++  size_t ndepend = orig_ndepend;
++  size_t normal = ndepend;
++  size_t n = 2;
+   size_t i;
+   size_t num_awaited = 0;
+   struct gomp_task *child_task = NULL;
+   struct gomp_task *to_free = NULL;
+   int do_wake = 0;
+ 
++  if (ndepend == 0)
++    {
++      ndepend = nout;
++      nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
++      normal = nout + (uintptr_t) depend[4];
++      n = 5;
++    }
+   gomp_mutex_lock (&team->task_lock);
+   for (i = 0; i < ndepend; i++)
+     {
+-      elem.addr = depend[i + 2];
++      elem.addr = depend[i + n];
++      elem.is_in = i >= nout;
++      if (__builtin_expect (i >= normal, 0))
++	{
++	  void **d = (void **) elem.addr;
++	  switch ((uintptr_t) d[1])
++	    {
++	    case GOMP_DEPEND_IN:
++	      break;
++	    case GOMP_DEPEND_OUT:
++	    case GOMP_DEPEND_INOUT:
++	    case GOMP_DEPEND_MUTEXINOUTSET:
++	      elem.is_in = 0;
++	      break;
++	    default:
++	      gomp_fatal ("unknown omp_depend_t dependence type %d",
++			  (int) (uintptr_t) d[1]);
++	    }
++	  elem.addr = d[0];
++	}
+       ent = htab_find (task->depend_hash, &elem);
+       for (; ent; ent = ent->next)
+-	if (i >= nout && ent->is_in)
++	if (elem.is_in && ent->is_in)
+ 	  continue;
+ 	else
+ 	  {
+@@ -1654,13 +1828,28 @@ GOMP_taskyield (void)
+   /* Nothing at the moment.  */
+ }
+ 
++static inline struct gomp_taskgroup *
++gomp_taskgroup_init (struct gomp_taskgroup *prev)
++{
++  struct gomp_taskgroup *taskgroup
++    = gomp_malloc (sizeof (struct gomp_taskgroup));
++  taskgroup->prev = prev;
++  priority_queue_init (&taskgroup->taskgroup_queue);
++  taskgroup->reductions = prev ? prev->reductions : NULL;
++  taskgroup->in_taskgroup_wait = false;
++  taskgroup->cancelled = false;
++  taskgroup->workshare = false;
++  taskgroup->num_children = 0;
++  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
++  return taskgroup;
++}
++
+ void
+ GOMP_taskgroup_start (void)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+   struct gomp_task *task = thr->task;
+-  struct gomp_taskgroup *taskgroup;
+ 
+   /* If team is NULL, all tasks are executed as
+      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
+@@ -1668,14 +1857,7 @@ GOMP_taskgroup_start (void)
+      by the time GOMP_taskgroup_end is called.  */
+   if (team == NULL)
+     return;
+-  taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
+-  taskgroup->prev = task->taskgroup;
+-  priority_queue_init (&taskgroup->taskgroup_queue);
+-  taskgroup->in_taskgroup_wait = false;
+-  taskgroup->cancelled = false;
+-  taskgroup->num_children = 0;
+-  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
+-  task->taskgroup = taskgroup;
++  task->taskgroup = gomp_taskgroup_init (task->taskgroup);
+ }
+ 
+ void
+@@ -1840,6 +2022,302 @@ GOMP_taskgroup_end (void)
+   free (taskgroup);
+ }
+ 
++static inline __attribute__((always_inline)) void
++gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
++			 unsigned nthreads)
++{
++  size_t total_cnt = 0;
++  uintptr_t *d = data;
++  struct htab *old_htab = NULL, *new_htab;
++  do
++    {
++      if (__builtin_expect (orig != NULL, 0))
++	{
++	  /* For worksharing task reductions, memory has been allocated
++	     already by some other thread that encountered the construct
++	     earlier.  */
++	  d[2] = orig[2];
++	  d[6] = orig[6];
++	  orig = (uintptr_t *) orig[4];
++	}
++      else
++	{
++	  size_t sz = d[1] * nthreads;
++	  /* Should use omp_alloc if d[3] is not -1.  */
++	  void *ptr = gomp_aligned_alloc (d[2], sz);
++	  memset (ptr, '\0', sz);
++	  d[2] = (uintptr_t) ptr;
++	  d[6] = d[2] + sz;
++	}
++      d[5] = 0;
++      total_cnt += d[0];
++      if (d[4] == 0)
++	{
++	  d[4] = (uintptr_t) old;
++	  break;
++	}
++      else
++	d = (uintptr_t *) d[4];
++    }
++  while (1);
++  if (old && old[5])
++    {
++      old_htab = (struct htab *) old[5];
++      total_cnt += htab_elements (old_htab);
++    }
++  new_htab = htab_create (total_cnt);
++  if (old_htab)
++    {
++      /* Copy old hash table, like in htab_expand.  */
++      hash_entry_type *p, *olimit;
++      new_htab->n_elements = htab_elements (old_htab);
++      olimit = old_htab->entries + old_htab->size;
++      p = old_htab->entries;
++      do
++	{
++	  hash_entry_type x = *p;
++	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
++	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
++	  p++;
++	}
++      while (p < olimit);
++    }
++  d = data;
++  do
++    {
++      size_t j;
++      for (j = 0; j < d[0]; ++j)
++	{
++	  uintptr_t *p = d + 7 + j * 3;
++	  p[2] = (uintptr_t) d;
++	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
++	     which hash on the first element which is a pointer.  We need
++	     to hash also on the first sizeof (uintptr_t) bytes which contain
++	     a pointer.  Hide the cast from the compiler.  */
++	  hash_entry_type n;
++	  __asm ("" : "=g" (n) : "0" (p));
++	  *htab_find_slot (&new_htab, n, INSERT) = n;
++	}
++      if (d[4] == (uintptr_t) old)
++	break;
++      else
++	d = (uintptr_t *) d[4];
++    }
++  while (1);
++  d[5] = (uintptr_t) new_htab;
++}
++
++static void
++gomp_create_artificial_team (void)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_task_icv *icv;
++  struct gomp_team *team = gomp_new_team (1);
++  struct gomp_task *task = thr->task;
++  icv = task ? &task->icv : &gomp_global_icv;
++  team->prev_ts = thr->ts;
++  thr->ts.team = team;
++  thr->ts.team_id = 0;
++  thr->ts.work_share = &team->work_shares[0];
++  thr->ts.last_work_share = NULL;
++#ifdef HAVE_SYNC_BUILTINS
++  thr->ts.single_count = 0;
++#endif
++  thr->ts.static_trip = 0;
++  thr->task = &team->implicit_task[0];
++  gomp_init_task (thr->task, NULL, icv);
++  if (task)
++    {
++      thr->task = task;
++      gomp_end_task ();
++      free (task);
++      thr->task = &team->implicit_task[0];
++    }
++#ifdef LIBGOMP_USE_PTHREADS
++  else
++    pthread_setspecific (gomp_thread_destructor, thr);
++#endif
++}
++
++/* The format of data is:
++   data[0]	cnt
++   data[1]	size
++   data[2]	alignment (on output array pointer)
++   data[3]	allocator (-1 if malloc allocator)
++   data[4]	next pointer
++   data[5]	used internally (htab pointer)
++   data[6]	used internally (end of array)
++   cnt times
++   ent[0]	address
++   ent[1]	offset
++   ent[2]	used internally (pointer to data[0])
++   The entries are sorted by increasing offset, so that a binary
++   search can be performed.  Normally, data[8] is 0, exception is
++   for worksharing construct task reductions in cancellable parallel,
++   where at offset 0 there should be space for a pointer and an integer
++   which are used internally.  */
++
++void
++GOMP_taskgroup_reduction_register (uintptr_t *data)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++  struct gomp_task *task;
++  unsigned nthreads;
++  if (__builtin_expect (team == NULL, 0))
++    {
++      /* The task reduction code needs a team and task, so for
++	 orphaned taskgroups just create the implicit team.  */
++      gomp_create_artificial_team ();
++      ialias_call (GOMP_taskgroup_start) ();
++      team = thr->ts.team;
++    }
++  nthreads = team->nthreads;
++  task = thr->task;
++  gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
++  task->taskgroup->reductions = data;
++}
++
++void
++GOMP_taskgroup_reduction_unregister (uintptr_t *data)
++{
++  uintptr_t *d = data;
++  htab_free ((struct htab *) data[5]);
++  do
++    {
++      gomp_aligned_free ((void *) d[2]);
++      d = (uintptr_t *) d[4];
++    }
++  while (d && !d[5]);
++}
++ialias (GOMP_taskgroup_reduction_unregister)
++
++/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
++   original list item or address of previously remapped original list
++   item to address of the private copy, store that to ptrs[i].
++   For i < cntorig, additionally set ptrs[cnt+i] to the address of
++   the original list item.  */
++
++void
++GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_task *task = thr->task;
++  unsigned id = thr->ts.team_id;
++  uintptr_t *data = task->taskgroup->reductions;
++  uintptr_t *d;
++  struct htab *reduction_htab = (struct htab *) data[5];
++  size_t i;
++  for (i = 0; i < cnt; ++i)
++    {
++      hash_entry_type ent, n;
++      __asm ("" : "=g" (ent) : "0" (ptrs + i));
++      n = htab_find (reduction_htab, ent);
++      if (n)
++	{
++	  uintptr_t *p;
++	  __asm ("" : "=g" (p) : "0" (n));
++	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
++	     p[1] is the offset within the allocated chunk for each
++	     thread, p[2] is the array registered with
++	     GOMP_taskgroup_reduction_register, d[2] is the base of the
++	     allocated memory and d[1] is the size of the allocated chunk
++	     for one thread.  */
++	  d = (uintptr_t *) p[2];
++	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
++	  if (__builtin_expect (i < cntorig, 0))
++	    ptrs[cnt + i] = (void *) p[0];
++	  continue;
++	}
++      d = data;
++      while (d != NULL)
++	{
++	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
++	    break;
++	  d = (uintptr_t *) d[4];
++	}
++      if (d == NULL)
++	gomp_fatal ("couldn't find matching task_reduction or reduction with "
++		    "task modifier for %p", ptrs[i]);
++      uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
++      ptrs[i] = (void *) (d[2] + id * d[1] + off);
++      if (__builtin_expect (i < cntorig, 0))
++	{
++	  size_t lo = 0, hi = d[0] - 1;
++	  while (lo <= hi)
++	    {
++	      size_t m = (lo + hi) / 2;
++	      if (d[7 + 3 * m + 1] < off)
++		lo = m + 1;
++	      else if (d[7 + 3 * m + 1] == off)
++		{
++		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
++		  break;
++		}
++	      else
++		hi = m - 1;
++	    }
++	  if (lo > hi)
++	    gomp_fatal ("couldn't find matching task_reduction or reduction "
++			"with task modifier for %p", ptrs[i]);
++	}
++    }
++}
++
++struct gomp_taskgroup *
++gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
++{
++  struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
++  gomp_reduction_register (data, NULL, NULL, nthreads);
++  taskgroup->reductions = data;
++  return taskgroup;
++}
++
++void
++gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++  struct gomp_task *task = thr->task;
++  unsigned nthreads = team->nthreads;
++  gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
++  task->taskgroup->reductions = data;
++}
++
++void
++gomp_workshare_taskgroup_start (void)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++  struct gomp_task *task;
++
++  if (team == NULL)
++    {
++      gomp_create_artificial_team ();
++      team = thr->ts.team;
++    }
++  task = thr->task;
++  task->taskgroup = gomp_taskgroup_init (task->taskgroup);
++  task->taskgroup->workshare = true;
++}
++
++void
++GOMP_workshare_task_reduction_unregister (bool cancelled)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_task *task = thr->task;
++  struct gomp_team *team = thr->ts.team;
++  uintptr_t *data = task->taskgroup->reductions;
++  ialias_call (GOMP_taskgroup_end) ();
++  if (thr->ts.team_id == 0)
++    ialias_call (GOMP_taskgroup_reduction_unregister) (data);
++  else
++    htab_free ((struct htab *) data[5]);
++
++  if (!cancelled)
++    gomp_team_barrier_wait (&team->barrier);
++}
++
+ int
+ omp_in_final (void)
+ {
+--- libgomp/team.c.jj	2018-04-25 09:40:31.322655307 +0200
++++ libgomp/team.c	2019-05-07 18:46:36.548109384 +0200
+@@ -32,7 +32,6 @@
+ #include <string.h>
+ 
+ #ifdef LIBGOMP_USE_PTHREADS
+-/* This attribute contains PTHREAD_CREATE_DETACHED.  */
+ pthread_attr_t gomp_thread_attr;
+ 
+ /* This key is for the thread destructor.  */
+@@ -58,6 +57,7 @@ struct gomp_thread_start_data
+   struct gomp_thread_pool *thread_pool;
+   unsigned int place;
+   bool nested;
++  pthread_t handle;
+ };
+ 
+ 
+@@ -89,6 +89,9 @@ gomp_thread_start (void *xdata)
+   thr->ts = data->ts;
+   thr->task = data->task;
+   thr->place = data->place;
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++  thr->handle = data->handle;
++#endif
+ 
+   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
+ 
+@@ -131,6 +134,7 @@ gomp_thread_start (void *xdata)
+     }
+ 
+   gomp_sem_destroy (&thr->release);
++  pthread_detach (pthread_self ());
+   thr->thread_pool = NULL;
+   thr->task = NULL;
+   return NULL;
+@@ -183,7 +187,7 @@ gomp_new_team (unsigned nthreads)
+   team->single_count = 0;
+ #endif
+   team->work_shares_to_free = &team->work_shares[0];
+-  gomp_init_work_share (&team->work_shares[0], false, nthreads);
++  gomp_init_work_share (&team->work_shares[0], 0, nthreads);
+   team->work_shares[0].next_alloc = NULL;
+   team->work_share_list_free = NULL;
+   team->work_share_list_alloc = &team->work_shares[1];
+@@ -231,6 +235,7 @@ gomp_free_pool_helper (void *thread_pool
+   thr->thread_pool = NULL;
+   thr->task = NULL;
+ #ifdef LIBGOMP_USE_PTHREADS
++  pthread_detach (pthread_self ());
+   pthread_exit (NULL);
+ #elif defined(__nvptx__)
+   asm ("exit;");
+@@ -297,7 +302,8 @@ gomp_free_thread (void *arg __attribute_
+ #ifdef LIBGOMP_USE_PTHREADS
+ void
+ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
+-		 unsigned flags, struct gomp_team *team)
++		 unsigned flags, struct gomp_team *team,
++		 struct gomp_taskgroup *taskgroup)
+ {
+   struct gomp_thread_start_data *start_data;
+   struct gomp_thread *thr, *nthr;
+@@ -312,6 +318,7 @@ gomp_team_start (void (*fn) (void *), vo
+   unsigned int s = 0, rest = 0, p = 0, k = 0;
+   unsigned int affinity_count = 0;
+   struct gomp_thread **affinity_thr = NULL;
++  bool force_display = false;
+ 
+   thr = gomp_thread ();
+   nested = thr->ts.level;
+@@ -319,7 +326,12 @@ gomp_team_start (void (*fn) (void *), vo
+   task = thr->task;
+   icv = task ? &task->icv : &gomp_global_icv;
+   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
+-    gomp_init_affinity ();
++    {
++      gomp_init_affinity ();
++      if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
++	gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
++				      thr->place);
++    }
+ 
+   /* Always save the previous state, even if this isn't a nested team.
+      In particular, we should save any work share state from an outer
+@@ -338,6 +350,9 @@ gomp_team_start (void (*fn) (void *), vo
+ #endif
+   thr->ts.static_trip = 0;
+   thr->task = &team->implicit_task[0];
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++  thr->handle = pthread_self ();
++#endif
+   nthreads_var = icv->nthreads_var;
+   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
+       && thr->ts.level < gomp_nthreads_var_list_len)
+@@ -350,6 +365,7 @@ gomp_team_start (void (*fn) (void *), vo
+       && thr->ts.level < gomp_bind_var_list_len)
+     bind_var = gomp_bind_var_list[thr->ts.level];
+   gomp_init_task (thr->task, task, icv);
++  thr->task->taskgroup = taskgroup;
+   team->implicit_task[0].icv.nthreads_var = nthreads_var;
+   team->implicit_task[0].icv.bind_var = bind_var;
+ 
+@@ -465,7 +481,9 @@ gomp_team_start (void (*fn) (void *), vo
+ 	  pool->threads
+ 	    = gomp_realloc (pool->threads,
+ 			    pool->threads_size
+-			    * sizeof (struct gomp_thread_data *));
++			    * sizeof (struct gomp_thread *));
++	  /* Add current (master) thread to threads[].  */
++	  pool->threads[0] = thr;
+ 	}
+ 
+       /* Release existing idle threads.  */
+@@ -540,6 +558,7 @@ gomp_team_start (void (*fn) (void *), vo
+ 						+ place_partition_len))
+ 		{
+ 		  unsigned int l;
++		  force_display = true;
+ 		  if (affinity_thr == NULL)
+ 		    {
+ 		      unsigned int j;
+@@ -623,6 +642,7 @@ gomp_team_start (void (*fn) (void *), vo
+ 	  gomp_init_task (nthr->task, task, icv);
+ 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ 	  team->implicit_task[i].icv.bind_var = bind_var;
++	  nthr->task->taskgroup = taskgroup;
+ 	  nthr->fn = fn;
+ 	  nthr->data = data;
+ 	  team->ordered_release[i] = &nthr->release;
+@@ -712,19 +732,17 @@ gomp_team_start (void (*fn) (void *), vo
+     {
+       size_t stacksize;
+       pthread_attr_init (&thread_attr);
+-      pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
+       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
+ 	pthread_attr_setstacksize (&thread_attr, stacksize);
+       attr = &thread_attr;
+     }
+ 
+   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
+-			    * (nthreads-i));
++			    * (nthreads - i));
+ 
+   /* Launch new threads.  */
+   for (; i < nthreads; ++i)
+     {
+-      pthread_t pt;
+       int err;
+ 
+       start_data->ts.place_partition_off = thr->ts.place_partition_off;
+@@ -810,11 +828,14 @@ gomp_team_start (void (*fn) (void *), vo
+       gomp_init_task (start_data->task, task, icv);
+       team->implicit_task[i].icv.nthreads_var = nthreads_var;
+       team->implicit_task[i].icv.bind_var = bind_var;
++      start_data->task->taskgroup = taskgroup;
+       start_data->thread_pool = pool;
+       start_data->nested = nested;
+ 
+       attr = gomp_adjust_thread_attr (attr, &thread_attr);
+-      err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
++      err = pthread_create (&start_data->handle, attr, gomp_thread_start,
++			    start_data);
++      start_data++;
+       if (err != 0)
+ 	gomp_fatal ("Thread creation failed: %s", strerror (err));
+     }
+@@ -854,6 +875,42 @@ gomp_team_start (void (*fn) (void *), vo
+       gomp_mutex_unlock (&gomp_managed_threads_lock);
+ #endif
+     }
++  if (__builtin_expect (gomp_display_affinity_var, 0))
++    {
++      if (nested
++	  || nthreads != old_threads_used
++	  || force_display)
++	{
++	  gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
++					thr->place);
++	  if (nested)
++	    {
++	      start_data -= nthreads - 1;
++	      for (i = 1; i < nthreads; ++i)
++		{
++		  gomp_display_affinity_thread (
++#ifdef LIBGOMP_USE_PTHREADS
++						start_data->handle,
++#else
++						gomp_thread_self (),
++#endif
++						&start_data->ts,
++						start_data->place);
++		  start_data++;
++		}
++	    }
++	  else
++	    {
++	      for (i = 1; i < nthreads; ++i)
++		{
++		  gomp_thread_handle handle
++		    = gomp_thread_to_pthread_t (pool->threads[i]);
++		  gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
++						pool->threads[i]->place);
++		}
++	    }
++	}
++    }
+   if (__builtin_expect (affinity_thr != NULL, 0)
+       && team->prev_ts.place_partition_len > 64)
+     free (affinity_thr);
+@@ -894,7 +951,7 @@ gomp_team_end (void)
+   gomp_end_task ();
+   thr->ts = team->prev_ts;
+ 
+-  if (__builtin_expect (thr->ts.team != NULL, 0))
++  if (__builtin_expect (thr->ts.level != 0, 0))
+     {
+ #ifdef HAVE_SYNC_BUILTINS
+       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
+@@ -959,6 +1016,76 @@ team_destructor (void)
+      crashes.  */
+   pthread_key_delete (gomp_thread_destructor);
+ }
++
++/* Similar to gomp_free_pool_helper, but don't detach itself,
++   gomp_pause_host will pthread_join those threads.  */
++
++static void
++gomp_pause_pool_helper (void *thread_pool)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_thread_pool *pool
++    = (struct gomp_thread_pool *) thread_pool;
++  gomp_simple_barrier_wait_last (&pool->threads_dock);
++  gomp_sem_destroy (&thr->release);
++  thr->thread_pool = NULL;
++  thr->task = NULL;
++  pthread_exit (NULL);
++}
++
++/* Free a thread pool and release its threads.  Return non-zero on
++   failure.  */
++
++int
++gomp_pause_host (void)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_thread_pool *pool = thr->thread_pool;
++  if (thr->ts.level)
++    return -1;
++  if (pool)
++    {
++      if (pool->threads_used > 0)
++	{
++	  int i;
++	  pthread_t *thrs
++	    = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
++	  for (i = 1; i < pool->threads_used; i++)
++	    {
++	      struct gomp_thread *nthr = pool->threads[i];
++	      nthr->fn = gomp_pause_pool_helper;
++	      nthr->data = pool;
++	      thrs[i] = gomp_thread_to_pthread_t (nthr);
++	    }
++	  /* This barrier undocks threads docked on pool->threads_dock.  */
++	  gomp_simple_barrier_wait (&pool->threads_dock);
++	  /* And this waits till all threads have called gomp_barrier_wait_last
++	     in gomp_pause_pool_helper.  */
++	  gomp_simple_barrier_wait (&pool->threads_dock);
++	  /* Now it is safe to destroy the barrier and free the pool.  */
++	  gomp_simple_barrier_destroy (&pool->threads_dock);
++
++#ifdef HAVE_SYNC_BUILTINS
++	  __sync_fetch_and_add (&gomp_managed_threads,
++				1L - pool->threads_used);
++#else
++	  gomp_mutex_lock (&gomp_managed_threads_lock);
++	  gomp_managed_threads -= pool->threads_used - 1L;
++	  gomp_mutex_unlock (&gomp_managed_threads_lock);
++#endif
++	  for (i = 1; i < pool->threads_used; i++)
++	    pthread_join (thrs[i], NULL);
++	}
++      if (pool->last_team)
++	free_team (pool->last_team);
++#ifndef __nvptx__
++      free (pool->threads);
++      free (pool);
++#endif
++      thr->thread_pool = NULL;
++    }
++  return 0;
++}
+ #endif
+ 
+ struct gomp_task_icv *
+--- libgomp/libgomp.h.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/libgomp.h	2019-05-07 19:01:51.285535999 +0200
+@@ -44,6 +44,7 @@
+ #include "config.h"
+ #include "gstdint.h"
+ #include "libgomp-plugin.h"
++#include "gomp-constants.h"
+ 
+ #ifdef HAVE_PTHREAD_H
+ #include <pthread.h>
+@@ -85,9 +86,21 @@ enum memmodel
+ 
+ /* alloc.c */
+ 
++#if defined(HAVE_ALIGNED_ALLOC) \
++    || defined(HAVE__ALIGNED_MALLOC) \
++    || defined(HAVE_POSIX_MEMALIGN) \
++    || defined(HAVE_MEMALIGN)
++/* Defined if gomp_aligned_alloc doesn't use fallback version
++   and free can be used instead of gomp_aligned_free.  */
++#define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1
++#endif
++
+ extern void *gomp_malloc (size_t) __attribute__((malloc));
+ extern void *gomp_malloc_cleared (size_t) __attribute__((malloc));
+ extern void *gomp_realloc (void *, size_t);
++extern void *gomp_aligned_alloc (size_t, size_t)
++  __attribute__((malloc, alloc_size (2)));
++extern void gomp_aligned_free (void *);
+ 
+ /* Avoid conflicting prototypes of alloca() in system headers by using
+    GCC's builtin alloca().  */
+@@ -137,7 +150,8 @@ enum gomp_schedule_type
+   GFS_STATIC,
+   GFS_DYNAMIC,
+   GFS_GUIDED,
+-  GFS_AUTO
++  GFS_AUTO,
++  GFS_MONOTONIC = 0x80000000U
+ };
+ 
+ struct gomp_doacross_work_share
+@@ -174,6 +188,8 @@ struct gomp_doacross_work_share
+     /* Likewise, but for the ull implementation.  */
+     unsigned long long boundary_ull;
+   };
++  /* Pointer to extra memory if needed for lastprivate(conditional).  */
++  void *extra;
+   /* Array of shift counts for each dimension if they can be flattened.  */
+   unsigned int shift_counts[];
+ };
+@@ -275,6 +291,9 @@ struct gomp_work_share
+     struct gomp_work_share *next_free;
+   };
+ 
++  /* Task reductions for this work-sharing construct.  */
++  uintptr_t *task_reductions;
++
+   /* If only few threads are in the team, ordered_team_ids can point
+      to this array which fills the padding at the end of this struct.  */
+   unsigned inline_ordered_team_ids[0];
+@@ -365,8 +384,12 @@ extern void **gomp_places_list;
+ extern unsigned long gomp_places_list_len;
+ extern unsigned int gomp_num_teams_var;
+ extern int gomp_debug_var;
++extern bool gomp_display_affinity_var;
++extern char *gomp_affinity_format_var;
++extern size_t gomp_affinity_format_len;
+ extern int goacc_device_num;
+ extern char *goacc_device_type;
++extern int goacc_default_dims[GOMP_DIM_MAX];
+ 
+ enum gomp_task_kind
+ {
+@@ -469,8 +492,10 @@ struct gomp_taskgroup
+   struct gomp_taskgroup *prev;
+   /* Queue of tasks that belong in this taskgroup.  */
+   struct priority_queue taskgroup_queue;
++  uintptr_t *reductions;
+   bool in_taskgroup_wait;
+   bool cancelled;
++  bool workshare;
+   gomp_sem_t taskgroup_sem;
+   size_t num_children;
+ };
+@@ -613,6 +638,19 @@ struct gomp_thread
+ 
+   /* User pthread thread pool */
+   struct gomp_thread_pool *thread_pool;
++
++#if defined(LIBGOMP_USE_PTHREADS) \
++    && (!defined(HAVE_TLS) \
++	|| !defined(__GLIBC__) \
++	|| !defined(USING_INITIAL_EXEC_TLS))
++  /* pthread_t of the thread containing this gomp_thread.
++     On Linux when using initial-exec TLS,
++     (typeof (pthread_t)) gomp_thread () - pthread_self ()
++     is constant in all threads, so we can optimize and not
++     store it.  */
++#define GOMP_NEEDS_THREAD_HANDLE 1
++  pthread_t handle;
++#endif
+ };
+ 
+ 
+@@ -709,6 +747,25 @@ extern bool gomp_affinity_finalize_place
+ extern bool gomp_affinity_init_level (int, unsigned long, bool);
+ extern void gomp_affinity_print_place (void *);
+ extern void gomp_get_place_proc_ids_8 (int, int64_t *);
++extern void gomp_display_affinity_place (char *, size_t, size_t *, int);
++
++/* affinity-fmt.c */
++
++extern void gomp_print_string (const char *str, size_t len);
++extern void gomp_set_affinity_format (const char *, size_t);
++extern void gomp_display_string (char *, size_t, size_t *, const char *,
++				 size_t);
++#ifdef LIBGOMP_USE_PTHREADS
++typedef pthread_t gomp_thread_handle;
++#else
++typedef struct {} gomp_thread_handle;
++#endif
++extern size_t gomp_display_affinity (char *, size_t, const char *,
++				     gomp_thread_handle,
++				     struct gomp_team_state *, unsigned int);
++extern void gomp_display_affinity_thread (gomp_thread_handle,
++					  struct gomp_team_state *,
++					  unsigned int) __attribute__((cold));
+ 
+ /* iter.c */
+ 
+@@ -745,9 +802,9 @@ extern void gomp_ordered_next (void);
+ extern void gomp_ordered_static_init (void);
+ extern void gomp_ordered_static_next (void);
+ extern void gomp_ordered_sync (void);
+-extern void gomp_doacross_init (unsigned, long *, long);
++extern void gomp_doacross_init (unsigned, long *, long, size_t);
+ extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
+-				    unsigned long long);
++				    unsigned long long, size_t);
+ 
+ /* parallel.c */
+ 
+@@ -770,6 +827,10 @@ extern bool gomp_create_target_task (str
+ 				     size_t *, unsigned short *, unsigned int,
+ 				     void **, void **,
+ 				     enum gomp_target_task_state);
++extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
++								unsigned);
++extern void gomp_workshare_taskgroup_start (void);
++extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *);
+ 
+ static void inline
+ gomp_finish_task (struct gomp_task *task)
+@@ -782,9 +843,11 @@ gomp_finish_task (struct gomp_task *task
+ 
+ extern struct gomp_team *gomp_new_team (unsigned);
+ extern void gomp_team_start (void (*) (void *), void *, unsigned,
+-			     unsigned, struct gomp_team *);
++			     unsigned, struct gomp_team *,
++			     struct gomp_taskgroup *);
+ extern void gomp_team_end (void);
+ extern void gomp_free_thread (void *);
++extern int gomp_pause_host (void);
+ 
+ /* target.c */
+ 
+@@ -851,6 +914,8 @@ struct splay_tree_key_s {
+   uintptr_t tgt_offset;
+   /* Reference count.  */
+   uintptr_t refcount;
++  /* Dynamic reference count.  */
++  uintptr_t dynamic_refcount;
+   /* Pointer to the original mapping of "omp declare target link" object.  */
+   splay_tree_key link_key;
+ };
+@@ -989,7 +1054,9 @@ enum gomp_map_vars_kind
+ };
+ 
+ extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
+-extern void gomp_acc_remove_pointer (void *, bool, int, int);
++extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
++extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
++				       unsigned short *);
+ 
+ extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
+ 					      size_t, void **, void **,
+@@ -999,12 +1066,13 @@ extern void gomp_unmap_vars (struct targ
+ extern void gomp_init_device (struct gomp_device_descr *);
+ extern void gomp_free_memmap (struct splay_tree_s *);
+ extern void gomp_unload_device (struct gomp_device_descr *);
++extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
+ 
+ /* work.c */
+ 
+-extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
++extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned);
+ extern void gomp_fini_work_share (struct gomp_work_share *);
+-extern bool gomp_work_share_start (bool);
++extern bool gomp_work_share_start (size_t);
+ extern void gomp_work_share_end (void);
+ extern bool gomp_work_share_end_cancel (void);
+ extern void gomp_work_share_end_nowait (void);
+@@ -1028,6 +1096,14 @@ gomp_work_share_init_done (void)
+ #include "omp-lock.h"
+ #define _LIBGOMP_OMP_LOCK_DEFINED 1
+ #include "omp.h.in"
++#define omp_sched_monotonic 0x80000000U
++typedef enum omp_pause_resource_t
++{
++  omp_pause_soft = 1,
++  omp_pause_hard = 2
++} omp_pause_resource_t;
++extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW;
++extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW;
+ 
+ #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \
+     || !defined (HAVE_ATTRIBUTE_ALIAS) \
+@@ -1082,16 +1158,26 @@ extern int gomp_test_nest_lock_25 (omp_n
+ # define attribute_hidden
+ #endif
+ 
++#if __GNUC__ >= 9
++#  define HAVE_ATTRIBUTE_COPY
++#endif
++
++#ifdef HAVE_ATTRIBUTE_COPY
++# define attribute_copy(arg) __attribute__ ((copy (arg)))
++#else
++# define attribute_copy(arg)
++#endif
++
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+ # define strong_alias(fn, al) \
+-  extern __typeof (fn) al __attribute__ ((alias (#fn)));
++  extern __typeof (fn) al __attribute__ ((alias (#fn))) attribute_copy (fn);
+ 
+ # define ialias_ulp	ialias_str1(__USER_LABEL_PREFIX__)
+ # define ialias_str1(x)	ialias_str2(x)
+ # define ialias_str2(x)	#x
+ # define ialias(fn) \
+   extern __typeof (fn) gomp_ialias_##fn \
+-    __attribute__ ((alias (#fn))) attribute_hidden;
++    __attribute__ ((alias (#fn))) attribute_hidden attribute_copy (fn);
+ # define ialias_redirect(fn) \
+   extern __typeof (fn) fn __asm__ (ialias_ulp "gomp_ialias_" #fn) attribute_hidden;
+ # define ialias_call(fn) gomp_ialias_ ## fn
+@@ -1131,4 +1217,42 @@ task_to_priority_node (enum priority_que
+   return (struct priority_node *) ((char *) task
+ 				   + priority_queue_offset (type));
+ }
++
++#ifdef LIBGOMP_USE_PTHREADS
++static inline gomp_thread_handle
++gomp_thread_self (void)
++{
++  return pthread_self ();
++}
++
++static inline gomp_thread_handle
++gomp_thread_to_pthread_t (struct gomp_thread *thr)
++{
++  struct gomp_thread *this_thr = gomp_thread ();
++  if (thr == this_thr)
++    return pthread_self ();
++#ifdef GOMP_NEEDS_THREAD_HANDLE
++  return thr->handle;
++#else
++  /* On Linux with initial-exec TLS, the pthread_t of the thread containing
++     thr can be computed from thr, this_thr and pthread_self (),
++     as the distance between this_thr and pthread_self () is constant.  */
++  return pthread_self () + ((uintptr_t) thr - (uintptr_t) this_thr);
++#endif
++}
++#else
++static inline gomp_thread_handle
++gomp_thread_self (void)
++{
++  return (gomp_thread_handle) {};
++}
++
++static inline gomp_thread_handle
++gomp_thread_to_pthread_t (struct gomp_thread *thr)
++{
++  (void) thr;
++  return gomp_thread_self ();
++}
++#endif
++
+ #endif /* LIBGOMP_H */
+--- libgomp/oacc-parallel.c.jj	2018-04-25 09:40:31.319655306 +0200
++++ libgomp/oacc-parallel.c	2019-05-07 19:09:47.010991153 +0200
+@@ -27,6 +27,8 @@
+ /* This file handles OpenACC constructs.  */
+ 
+ #include "openacc.h"
++void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
++void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
+ #include "libgomp.h"
+ #include "libgomp_g.h"
+ #include "gomp-constants.h"
+@@ -38,31 +40,95 @@
+ #include <stdarg.h>
+ #include <assert.h>
+ 
++
++/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
++   continue to support the following two legacy values.  */
++_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
++		"legacy GOMP_DEVICE_ICV broken");
++_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
++		== GOACC_FLAG_HOST_FALLBACK,
++		"legacy GOMP_DEVICE_HOST_FALLBACK broken");
++
++
++/* Returns the number of mappings associated with the pointer or pset. PSET
++   have three mappings, whereas pointer have two.  */
++
+ static int
+-find_pset (int pos, size_t mapnum, unsigned short *kinds)
++find_pointer (int pos, size_t mapnum, unsigned short *kinds)
+ {
+   if (pos + 1 >= mapnum)
+     return 0;
+ 
+   unsigned char kind = kinds[pos+1] & 0xff;
+ 
+-  return kind == GOMP_MAP_TO_PSET;
++  if (kind == GOMP_MAP_TO_PSET)
++    return 3;
++  else if (kind == GOMP_MAP_POINTER)
++    return 2;
++
++  return 0;
++}
++
++/* Handle the mapping pair that are presented when a
++   deviceptr clause is used with Fortran.  */
++
++static void
++handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
++		     unsigned short *kinds)
++{
++  int i;
++
++  for (i = 0; i < mapnum; i++)
++    {
++      unsigned short kind1 = kinds[i] & 0xff;
++
++      /* Handle Fortran deviceptr clause.  */
++      if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
++	{
++	  unsigned short kind2;
++
++	  if (i < (signed)mapnum - 1)
++	    kind2 = kinds[i + 1] & 0xff;
++	  else
++	    kind2 = 0xffff;
++
++	  if (sizes[i] == sizeof (void *))
++	    continue;
++
++	  /* At this point, we're dealing with a Fortran deviceptr.
++	     If the next element is not what we're expecting, then
++	     this is an instance of where the deviceptr variable was
++	     not used within the region and the pointer was removed
++	     by the gimplifier.  */
++	  if (kind2 == GOMP_MAP_POINTER
++	      && sizes[i + 1] == 0
++	      && hostaddrs[i] == *(void **)hostaddrs[i + 1])
++	    {
++	      kinds[i+1] = kinds[i];
++	      sizes[i+1] = sizeof (void *);
++	    }
++
++	  /* Invalidate the entry.  */
++	  hostaddrs[i] = NULL;
++	}
++    }
+ }
+ 
+ static void goacc_wait (int async, int num_waits, va_list *ap);
+ 
+ 
+-/* Launch a possibly offloaded function on DEVICE.  FN is the host fn
++/* Launch a possibly offloaded function with FLAGS.  FN is the host fn
+    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
+    blocks to be copied to/from the device.  Varadic arguments are
+    keyed optional parameters terminated with a zero.  */
+ 
+ void
+-GOACC_parallel_keyed (int device, void (*fn) (void *),
++GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
+ 		      size_t mapnum, void **hostaddrs, size_t *sizes,
+ 		      unsigned short *kinds, ...)
+ {
+-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+   va_list ap;
+   struct goacc_thread *thr;
+   struct gomp_device_descr *acc_dev;
+@@ -88,9 +154,11 @@ GOACC_parallel_keyed (int device, void (
+   thr = goacc_thread ();
+   acc_dev = thr->dev;
+ 
++  handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
++
+   /* Host fallback if "if" clause is false or if the current device is set to
+      the host.  */
+-  if (host_fallback)
++  if (flags & GOACC_FLAG_HOST_FALLBACK)
+     {
+       goacc_save_and_set_bind (acc_device_host);
+       fn (hostaddrs);
+@@ -140,9 +208,7 @@ GOACC_parallel_keyed (int device, void (
+ 	case GOMP_LAUNCH_WAIT:
+ 	  {
+ 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
+-
+-	    if (num_waits)
+-	      goacc_wait (async, num_waits, &ap);
++	    goacc_wait (async, num_waits, &ap);
+ 	    break;
+ 	  }
+ 
+@@ -177,16 +243,36 @@ GOACC_parallel_keyed (int device, void (
+   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
+   for (i = 0; i < mapnum; i++)
+     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+-			    + tgt->list[i].key->tgt_offset);
++			    + tgt->list[i].key->tgt_offset
++			    + tgt->list[i].offset);
+ 
+   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ 			      async, dims, tgt);
+ 
+   /* If running synchronously, unmap immediately.  */
+-  if (async < acc_async_noval)
++  bool copyfrom = true;
++  if (async_synchronous_p (async))
+     gomp_unmap_vars (tgt, true);
+   else
+-    tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
++    {
++      bool async_unmap = false;
++      for (size_t i = 0; i < tgt->list_count; i++)
++	{
++	  splay_tree_key k = tgt->list[i].key;
++	  if (k && k->refcount == 1)
++	    {
++	      async_unmap = true;
++	      break;
++	    }
++	}
++      if (async_unmap)
++	tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
++      else
++	{
++	  copyfrom = false;
++	  gomp_unmap_vars (tgt, copyfrom);
++	}
++    }
+ 
+   acc_dev->openacc.async_set_async_func (acc_async_sync);
+ }
+@@ -194,7 +280,7 @@ GOACC_parallel_keyed (int device, void (
+ /* Legacy entry point, only provide host execution.  */
+ 
+ void
+-GOACC_parallel (int device, void (*fn) (void *),
++GOACC_parallel (int flags_m, void (*fn) (void *),
+ 		size_t mapnum, void **hostaddrs, size_t *sizes,
+ 		unsigned short *kinds,
+ 		int num_gangs, int num_workers, int vector_length,
+@@ -206,10 +292,11 @@ GOACC_parallel (int device, void (*fn) (
+ }
+ 
+ void
+-GOACC_data_start (int device, size_t mapnum,
++GOACC_data_start (int flags_m, size_t mapnum,
+ 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
+ {
+-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+   struct target_mem_desc *tgt;
+ 
+ #ifdef HAVE_INTTYPES_H
+@@ -227,7 +314,7 @@ GOACC_data_start (int device, size_t map
+ 
+   /* Host fallback or 'do nothing'.  */
+   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+-      || host_fallback)
++      || (flags & GOACC_FLAG_HOST_FALLBACK))
+     {
+       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ 			   GOMP_MAP_VARS_OPENACC);
+@@ -258,13 +345,14 @@ GOACC_data_end (void)
+ }
+ 
+ void
+-GOACC_enter_exit_data (int device, size_t mapnum,
++GOACC_enter_exit_data (int flags_m, size_t mapnum,
+ 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ 		       int async, int num_waits, ...)
+ {
++  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+   struct goacc_thread *thr;
+   struct gomp_device_descr *acc_dev;
+-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+   bool data_enter = false;
+   size_t i;
+ 
+@@ -274,7 +362,7 @@ GOACC_enter_exit_data (int device, size_
+   acc_dev = thr->dev;
+ 
+   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+-      || host_fallback)
++      || (flags & GOACC_FLAG_HOST_FALLBACK))
+     return;
+ 
+   if (num_waits)
+@@ -286,6 +374,17 @@ GOACC_enter_exit_data (int device, size_
+       va_end (ap);
+     }
+ 
++  /* Determine whether "finalize" semantics apply to all mappings of this
++     OpenACC directive.  */
++  bool finalize = false;
++  if (mapnum > 0)
++    {
++      unsigned char kind = kinds[0] & 0xff;
++      if (kind == GOMP_MAP_DELETE
++	  || kind == GOMP_MAP_FORCE_FROM)
++	finalize = true;
++    }
++
+   acc_dev->openacc.async_set_async_func (async);
+ 
+   /* Determine if this is an "acc enter data".  */
+@@ -298,13 +397,17 @@ GOACC_enter_exit_data (int device, size_
+ 
+       if (kind == GOMP_MAP_FORCE_ALLOC
+ 	  || kind == GOMP_MAP_FORCE_PRESENT
+-	  || kind == GOMP_MAP_FORCE_TO)
++	  || kind == GOMP_MAP_FORCE_TO
++	  || kind == GOMP_MAP_TO
++	  || kind == GOMP_MAP_ALLOC)
+ 	{
+ 	  data_enter = true;
+ 	  break;
+ 	}
+ 
+-      if (kind == GOMP_MAP_DELETE
++      if (kind == GOMP_MAP_RELEASE
++	  || kind == GOMP_MAP_DELETE
++	  || kind == GOMP_MAP_FROM
+ 	  || kind == GOMP_MAP_FORCE_FROM)
+ 	break;
+ 
+@@ -312,31 +415,35 @@ GOACC_enter_exit_data (int device, size_
+ 		      kind);
+     }
+ 
++  /* In c, non-pointers and arrays are represented by a single data clause.
++     Dynamically allocated arrays and subarrays are represented by a data
++     clause followed by an internal GOMP_MAP_POINTER.
++
++     In fortran, scalars and not allocated arrays are represented by a
++     single data clause. Allocated arrays and subarrays have three mappings:
++     1) the original data clause, 2) a PSET 3) a pointer to the array data.
++  */
++
+   if (data_enter)
+     {
+       for (i = 0; i < mapnum; i++)
+ 	{
+ 	  unsigned char kind = kinds[i] & 0xff;
+ 
+-	  /* Scan for PSETs.  */
+-	  int psets = find_pset (i, mapnum, kinds);
++	  /* Scan for pointers and PSETs.  */
++	  int pointer = find_pointer (i, mapnum, kinds);
+ 
+-	  if (!psets)
++	  if (!pointer)
+ 	    {
+ 	      switch (kind)
+ 		{
+-		case GOMP_MAP_POINTER:
+-		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
+-					&kinds[i]);
+-		  break;
++		case GOMP_MAP_ALLOC:
+ 		case GOMP_MAP_FORCE_ALLOC:
+ 		  acc_create (hostaddrs[i], sizes[i]);
+ 		  break;
+-		case GOMP_MAP_FORCE_PRESENT:
+-		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
+-		  break;
++		case GOMP_MAP_TO:
+ 		case GOMP_MAP_FORCE_TO:
+-		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
++		  acc_copyin (hostaddrs[i], sizes[i]);
+ 		  break;
+ 		default:
+ 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+@@ -346,12 +453,13 @@ GOACC_enter_exit_data (int device, size_
+ 	    }
+ 	  else
+ 	    {
+-	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
++	      gomp_acc_insert_pointer (pointer, &hostaddrs[i],
++				       &sizes[i], &kinds[i]);
+ 	      /* Increment 'i' by two because OpenACC requires fortran
+ 		 arrays to be contiguous, so each PSET is associated with
+ 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
+ 		 one MAP_POINTER.  */
+-	      i += 2;
++	      i += pointer - 1;
+ 	    }
+ 	}
+     }
+@@ -360,22 +468,28 @@ GOACC_enter_exit_data (int device, size_
+       {
+ 	unsigned char kind = kinds[i] & 0xff;
+ 
+-	int psets = find_pset (i, mapnum, kinds);
++	int pointer = find_pointer (i, mapnum, kinds);
+ 
+-	if (!psets)
++	if (!pointer)
+ 	  {
+ 	    switch (kind)
+ 	      {
+-	      case GOMP_MAP_POINTER:
+-		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+-					 == GOMP_MAP_FORCE_FROM,
+-					 async, 1);
+-		break;
++	      case GOMP_MAP_RELEASE:
+ 	      case GOMP_MAP_DELETE:
+-		acc_delete (hostaddrs[i], sizes[i]);
++		if (acc_is_present (hostaddrs[i], sizes[i]))
++		  {
++		    if (finalize)
++		      acc_delete_finalize (hostaddrs[i], sizes[i]);
++		    else
++		      acc_delete (hostaddrs[i], sizes[i]);
++		  }
+ 		break;
++	      case GOMP_MAP_FROM:
+ 	      case GOMP_MAP_FORCE_FROM:
+-		acc_copyout (hostaddrs[i], sizes[i]);
++		if (finalize)
++		  acc_copyout_finalize (hostaddrs[i], sizes[i]);
++		else
++		  acc_copyout (hostaddrs[i], sizes[i]);
+ 		break;
+ 	      default:
+ 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+@@ -385,10 +499,12 @@ GOACC_enter_exit_data (int device, size_
+ 	  }
+ 	else
+ 	  {
+-	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+-				     == GOMP_MAP_FORCE_FROM, async, 3);
++	    bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
++			     || kind == GOMP_MAP_FROM);
++	    gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
++				     finalize, pointer);
+ 	    /* See the above comment.  */
+-	    i += 2;
++	    i += pointer - 1;
+ 	  }
+       }
+ 
+@@ -398,13 +514,20 @@ GOACC_enter_exit_data (int device, size_
+ static void
+ goacc_wait (int async, int num_waits, va_list *ap)
+ {
+-  struct goacc_thread *thr = goacc_thread ();
+-  struct gomp_device_descr *acc_dev = thr->dev;
+-
+   while (num_waits--)
+     {
+       int qid = va_arg (*ap, int);
+-      
++
++      /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
++      if (qid == acc_async_noval)
++	{
++	  if (async == acc_async_sync)
++	    acc_wait_all ();
++	  else
++	    acc_wait_all_async (async);
++	  break;
++	}
++
+       if (acc_async_test (qid))
+ 	continue;
+ 
+@@ -415,16 +538,17 @@ goacc_wait (int async, int num_waits, va
+ 	    launching on, the queue itself will order work as
+ 	    required, so there's no need to wait explicitly.  */
+       else
+-	acc_dev->openacc.async_wait_async_func (qid, async);
++	acc_wait_async (qid, async);
+     }
+ }
+ 
+ void
+-GOACC_update (int device, size_t mapnum,
++GOACC_update (int flags_m, size_t mapnum,
+ 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ 	      int async, int num_waits, ...)
+ {
+-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
++
+   size_t i;
+ 
+   goacc_lazy_initialize ();
+@@ -433,7 +557,7 @@ GOACC_update (int device, size_t mapnum,
+   struct gomp_device_descr *acc_dev = thr->dev;
+ 
+   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+-      || host_fallback)
++      || (flags & GOACC_FLAG_HOST_FALLBACK))
+     return;
+ 
+   if (num_waits)
+@@ -447,6 +571,7 @@ GOACC_update (int device, size_t mapnum,
+ 
+   acc_dev->openacc.async_set_async_func (async);
+ 
++  bool update_device = false;
+   for (i = 0; i < mapnum; ++i)
+     {
+       unsigned char kind = kinds[i] & 0xff;
+@@ -457,11 +582,46 @@ GOACC_update (int device, size_t mapnum,
+ 	case GOMP_MAP_TO_PSET:
+ 	  break;
+ 
++	case GOMP_MAP_ALWAYS_POINTER:
++	  if (update_device)
++	    {
++	      /* Save the contents of the host pointer.  */
++	      void *dptr = acc_deviceptr (hostaddrs[i-1]);
++	      uintptr_t t = *(uintptr_t *) hostaddrs[i];
++
++	      /* Update the contents of the host pointer to reflect
++		 the value of the allocated device memory in the
++		 previous pointer.  */
++	      *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
++	      acc_update_device (hostaddrs[i], sizeof (uintptr_t));
++
++	      /* Restore the host pointer.  */
++	      *(uintptr_t *) hostaddrs[i] = t;
++	      update_device = false;
++	    }
++	  break;
++
++	case GOMP_MAP_TO:
++	  if (!acc_is_present (hostaddrs[i], sizes[i]))
++	    {
++	      update_device = false;
++	      break;
++	    }
++	  /* Fallthru  */
+ 	case GOMP_MAP_FORCE_TO:
++	  update_device = true;
+ 	  acc_update_device (hostaddrs[i], sizes[i]);
+ 	  break;
+ 
++	case GOMP_MAP_FROM:
++	  if (!acc_is_present (hostaddrs[i], sizes[i]))
++	    {
++	      update_device = false;
++	      break;
++	    }
++	  /* Fallthru  */
+ 	case GOMP_MAP_FORCE_FROM:
++	  update_device = false;
+ 	  acc_update_self (hostaddrs[i], sizes[i]);
+ 	  break;
+ 
+@@ -487,8 +647,8 @@ GOACC_wait (int async, int num_waits, ..
+     }
+   else if (async == acc_async_sync)
+     acc_wait_all ();
+-  else if (async == acc_async_noval)
+-    goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
++  else
++    acc_wait_all_async (async);
+ }
+ 
+ int
+@@ -504,7 +664,7 @@ GOACC_get_thread_num (void)
+ }
+ 
+ void
+-GOACC_declare (int device, size_t mapnum,
++GOACC_declare (int flags_m, size_t mapnum,
+ 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
+ {
+   int i;
+@@ -522,9 +682,10 @@ GOACC_declare (int device, size_t mapnum
+ 	  case GOMP_MAP_FORCE_FROM:
+ 	  case GOMP_MAP_FORCE_TO:
+ 	  case GOMP_MAP_POINTER:
++	  case GOMP_MAP_RELEASE:
+ 	  case GOMP_MAP_DELETE:
+-	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+-				   &kinds[i], 0, 0);
++	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], GOMP_ASYNC_SYNC, 0);
+ 	    break;
+ 
+ 	  case GOMP_MAP_FORCE_DEVICEPTR:
+@@ -532,20 +693,19 @@ GOACC_declare (int device, size_t mapnum
+ 
+ 	  case GOMP_MAP_ALLOC:
+ 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
+-	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+-				     &kinds[i], 0, 0);
++	      GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++				     &kinds[i], GOMP_ASYNC_SYNC, 0);
+ 	    break;
+ 
+ 	  case GOMP_MAP_TO:
+-	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+-				   &kinds[i], 0, 0);
++	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], GOMP_ASYNC_SYNC, 0);
+ 
+ 	    break;
+ 
+ 	  case GOMP_MAP_FROM:
+-	    kinds[i] = GOMP_MAP_FORCE_FROM;
+-	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+-				   &kinds[i], 0, 0);
++	    GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], GOMP_ASYNC_SYNC, 0);
+ 	    break;
+ 
+ 	  case GOMP_MAP_FORCE_PRESENT:
+--- libgomp/openacc2.f90.jj	2019-05-07 19:54:18.828514375 +0200
++++ libgomp/openacc2.f90	2019-05-07 19:56:38.454296347 +0200
+@@ -0,0 +1,1502 @@
++!  OpenACC Runtime Library Definitions.
++
++!  Copyright (C) 2014-2019 Free Software Foundation, Inc.
++
++!  Contributed by Tobias Burnus <burnus@net-b.de>
++!              and Mentor Embedded.
++
++!  This file is part of the GNU Offloading and Multi Processing Library
++!  (libgomp).
++
++!  Libgomp is free software; you can redistribute it and/or modify it
++!  under the terms of the GNU General Public License as published by
++!  the Free Software Foundation; either version 3, or (at your option)
++!  any later version.
++
++!  Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++!  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++!  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++!  more details.
++
++!  Under Section 7 of GPL version 3, you are granted additional
++!  permissions described in the GCC Runtime Library Exception, version
++!  3.1, as published by the Free Software Foundation.
++
++!  You should have received a copy of the GNU General Public License and
++!  a copy of the GCC Runtime Library Exception along with this program;
++!  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++!  <http://www.gnu.org/licenses/>.
++
++module openacc_kinds2
++  use iso_fortran_env, only: int32
++  implicit none
++
++  private :: int32
++  public :: acc_device_kind
++
++  integer, parameter :: acc_device_kind = int32
++
++  public :: acc_device_none, acc_device_default, acc_device_host
++  public :: acc_device_not_host, acc_device_nvidia
++
++  ! Keep in sync with include/gomp-constants.h.
++  integer (acc_device_kind), parameter :: acc_device_none = 0
++  integer (acc_device_kind), parameter :: acc_device_default = 1
++  integer (acc_device_kind), parameter :: acc_device_host = 2
++  ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
++  integer (acc_device_kind), parameter :: acc_device_not_host = 4
++  integer (acc_device_kind), parameter :: acc_device_nvidia = 5
++
++  public :: acc_handle_kind
++
++  integer, parameter :: acc_handle_kind = int32
++
++  public :: acc_async_noval, acc_async_sync
++
++  ! Keep in sync with include/gomp-constants.h.
++  integer (acc_handle_kind), parameter :: acc_async_noval = -1
++  integer (acc_handle_kind), parameter :: acc_async_sync = -2
++
++end module
++
++module openacc_internal2
++  use openacc_kinds2
++  implicit none
++
++  interface
++    function acc_get_num_devices_h (d)
++      import
++      integer acc_get_num_devices_h
++      integer (acc_device_kind) d
++    end function
++
++    subroutine acc_set_device_type_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_get_device_type_h ()
++      import
++      integer (acc_device_kind) acc_get_device_type_h
++    end function
++
++    subroutine acc_set_device_num_h (n, d)
++      import
++      integer n
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_get_device_num_h (d)
++      import
++      integer acc_get_device_num_h
++      integer (acc_device_kind) d
++    end function
++
++    function acc_async_test_h (a)
++      logical acc_async_test_h
++      integer a
++    end function
++
++    function acc_async_test_all_h ()
++      logical acc_async_test_all_h
++    end function
++
++    subroutine acc_wait_h (a)
++      integer a
++    end subroutine
++
++    subroutine acc_wait_async_h (a1, a2)
++      integer a1, a2
++    end subroutine
++
++    subroutine acc_wait_all_h ()
++    end subroutine
++
++    subroutine acc_wait_all_async_h (a)
++      integer a
++    end subroutine
++
++    subroutine acc_init_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    subroutine acc_shutdown_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_on_device_h (d)
++      import
++      integer (acc_device_kind) d
++      logical acc_on_device_h
++    end function
++
++    subroutine acc_copyin_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_copyin_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_copyin_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_present_or_copyin_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_present_or_copyin_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_present_or_copyin_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_create_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_create_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_create_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_present_or_create_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_present_or_create_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_present_or_create_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_copyout_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_copyout_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_copyout_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_copyout_finalize_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_copyout_finalize_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_copyout_finalize_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_delete_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_delete_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_delete_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_delete_finalize_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_delete_finalize_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_delete_finalize_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_update_device_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_update_device_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_update_device_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_update_self_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_update_self_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_update_self_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    function acc_is_present_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      logical acc_is_present_32_h
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end function
++
++    function acc_is_present_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      logical acc_is_present_64_h
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end function
++
++    function acc_is_present_array_h (a)
++      logical acc_is_present_array_h
++      type (*), dimension (..), contiguous :: a
++    end function
++
++    subroutine acc_copyin_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_copyin_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_copyin_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_create_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_create_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_create_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_copyout_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_copyout_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_copyout_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_delete_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_delete_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_delete_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_device_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_device_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_device_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_self_async_32_h (a, len, async)
++      use iso_c_binding, only: c_int32_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_self_async_64_h (a, len, async)
++      use iso_c_binding, only: c_int64_t
++      use openacc_kinds2, only: acc_handle_kind
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++      integer (acc_handle_kind) async
++    end subroutine
++
++    subroutine acc_update_self_async_array_h (a, async)
++      use openacc_kinds2, only: acc_handle_kind
++      type (*), dimension (..), contiguous :: a
++      integer (acc_handle_kind) async
++    end subroutine
++  end interface
++
++  interface
++    function acc_get_num_devices_l (d) &
++        bind (C, name = "acc_get_num_devices")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_num_devices_l
++      integer (c_int), value :: d
++    end function
++
++    subroutine acc_set_device_type_l (d) &
++        bind (C, name = "acc_set_device_type")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    function acc_get_device_type_l () &
++        bind (C, name = "acc_get_device_type")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_device_type_l
++    end function
++
++    subroutine acc_set_device_num_l (n, d) &
++        bind (C, name = "acc_set_device_num")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: n, d
++    end subroutine
++
++    function acc_get_device_num_l (d) &
++        bind (C, name = "acc_get_device_num")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_device_num_l
++      integer (c_int), value :: d
++    end function
++
++    function acc_async_test_l (a) &
++        bind (C, name = "acc_async_test")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_async_test_l
++      integer (c_int), value :: a
++    end function
++
++    function acc_async_test_all_l () &
++        bind (C, name = "acc_async_test_all")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_async_test_all_l
++    end function
++
++    subroutine acc_wait_l (a) &
++        bind (C, name = "acc_wait")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a
++    end subroutine
++
++    subroutine acc_wait_async_l (a1, a2) &
++        bind (C, name = "acc_wait_async")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a1, a2
++    end subroutine
++
++    subroutine acc_wait_all_l () &
++        bind (C, name = "acc_wait_all")
++      use iso_c_binding, only: c_int
++    end subroutine
++
++    subroutine acc_wait_all_async_l (a) &
++        bind (C, name = "acc_wait_all_async")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a
++    end subroutine
++
++    subroutine acc_init_l (d) &
++        bind (C, name = "acc_init")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    subroutine acc_shutdown_l (d) &
++        bind (C, name = "acc_shutdown")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    function acc_on_device_l (d) &
++        bind (C, name = "acc_on_device")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_on_device_l
++      integer (c_int), value :: d
++    end function
++
++    subroutine acc_copyin_l (a, len) &
++        bind (C, name = "acc_copyin")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_present_or_copyin_l (a, len) &
++        bind (C, name = "acc_present_or_copyin")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_create_l (a, len) &
++        bind (C, name = "acc_create")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_present_or_create_l (a, len) &
++        bind (C, name = "acc_present_or_create")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_copyout_l (a, len) &
++        bind (C, name = "acc_copyout")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_copyout_finalize_l (a, len) &
++        bind (C, name = "acc_copyout_finalize")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_delete_l (a, len) &
++        bind (C, name = "acc_delete")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_delete_finalize_l (a, len) &
++        bind (C, name = "acc_delete_finalize")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_update_device_l (a, len) &
++        bind (C, name = "acc_update_device")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_update_self_l (a, len) &
++        bind (C, name = "acc_update_self")
++      use iso_c_binding, only: c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    function acc_is_present_l (a, len) &
++        bind (C, name = "acc_is_present")
++      use iso_c_binding, only: c_int32_t, c_size_t
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      integer (c_int32_t) :: acc_is_present_l
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end function
++
++    subroutine acc_copyin_async_l (a, len, async) &
++        bind (C, name = "acc_copyin_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++
++    subroutine acc_create_async_l (a, len, async) &
++        bind (C, name = "acc_create_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++
++    subroutine acc_copyout_async_l (a, len, async) &
++        bind (C, name = "acc_copyout_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++
++    subroutine acc_delete_async_l (a, len, async) &
++        bind (C, name = "acc_delete_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++
++    subroutine acc_update_device_async_l (a, len, async) &
++        bind (C, name = "acc_update_device_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++
++    subroutine acc_update_self_async_l (a, len, async) &
++        bind (C, name = "acc_update_self_async")
++      use iso_c_binding, only: c_size_t, c_int
++      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++      integer (c_int), value :: async
++    end subroutine
++  end interface
++end module
++
++module openacc2
++  use openacc_kinds2
++  use openacc_internal2
++  implicit none
++
++  public :: openacc_version
++
++  public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
++  public :: acc_set_device_num, acc_get_device_num, acc_async_test
++  public :: acc_async_test_all
++  public :: acc_wait, acc_async_wait, acc_wait_async
++  public :: acc_wait_all, acc_async_wait_all, acc_wait_all_async
++  public :: acc_init, acc_shutdown, acc_on_device
++  public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
++  public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
++  public :: acc_update_device, acc_update_self, acc_is_present
++  public :: acc_copyin_async, acc_create_async, acc_copyout_async
++  public :: acc_delete_async, acc_update_device_async, acc_update_self_async
++
++  integer, parameter :: openacc_version = 201306
++
++  interface acc_get_num_devices
++    procedure :: acc_get_num_devices_h
++  end interface
++
++  interface acc_set_device_type
++    procedure :: acc_set_device_type_h
++  end interface
++
++  interface acc_get_device_type
++    procedure :: acc_get_device_type_h
++  end interface
++
++  interface acc_set_device_num
++    procedure :: acc_set_device_num_h
++  end interface
++
++  interface acc_get_device_num
++    procedure :: acc_get_device_num_h
++  end interface
++
++  interface acc_async_test
++    procedure :: acc_async_test_h
++  end interface
++
++  interface acc_async_test_all
++    procedure :: acc_async_test_all_h
++  end interface
++
++  interface acc_wait
++    procedure :: acc_wait_h
++  end interface
++
++  ! acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.
++  interface acc_async_wait
++    procedure :: acc_wait_h
++  end interface
++
++  interface acc_wait_async
++    procedure :: acc_wait_async_h
++  end interface
++
++  interface acc_wait_all
++    procedure :: acc_wait_all_h
++  end interface
++
++  ! acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.
++  interface acc_async_wait_all
++    procedure :: acc_wait_all_h
++  end interface
++
++  interface acc_wait_all_async
++    procedure :: acc_wait_all_async_h
++  end interface
++
++  interface acc_init
++    procedure :: acc_init_h
++  end interface
++
++  interface acc_shutdown
++    procedure :: acc_shutdown_h
++  end interface
++
++  interface acc_on_device
++    procedure :: acc_on_device_h
++  end interface
++
++  ! acc_malloc: Only available in C/C++
++  ! acc_free: Only available in C/C++
++
++  ! As vendor extension, the following code supports both 32bit and 64bit
++  ! arguments for "size"; the OpenACC standard only permits default-kind
++  ! integers, which are of kind 4 (i.e. 32 bits).
++  ! Additionally, the two-argument version also takes arrays as argument.
++  ! and the one argument version also scalars. Note that the code assumes
++  ! that the arrays are contiguous.
++
++  interface acc_copyin
++    procedure :: acc_copyin_32_h
++    procedure :: acc_copyin_64_h
++    procedure :: acc_copyin_array_h
++  end interface
++
++  interface acc_present_or_copyin
++    procedure :: acc_present_or_copyin_32_h
++    procedure :: acc_present_or_copyin_64_h
++    procedure :: acc_present_or_copyin_array_h
++  end interface
++
++  interface acc_pcopyin
++    procedure :: acc_present_or_copyin_32_h
++    procedure :: acc_present_or_copyin_64_h
++    procedure :: acc_present_or_copyin_array_h
++  end interface
++
++  interface acc_create
++    procedure :: acc_create_32_h
++    procedure :: acc_create_64_h
++    procedure :: acc_create_array_h
++  end interface
++
++  interface acc_present_or_create
++    procedure :: acc_present_or_create_32_h
++    procedure :: acc_present_or_create_64_h
++    procedure :: acc_present_or_create_array_h
++  end interface
++
++  interface acc_pcreate
++    procedure :: acc_present_or_create_32_h
++    procedure :: acc_present_or_create_64_h
++    procedure :: acc_present_or_create_array_h
++  end interface
++
++  interface acc_copyout
++    procedure :: acc_copyout_32_h
++    procedure :: acc_copyout_64_h
++    procedure :: acc_copyout_array_h
++  end interface
++
++  interface acc_copyout_finalize
++    procedure :: acc_copyout_finalize_32_h
++    procedure :: acc_copyout_finalize_64_h
++    procedure :: acc_copyout_finalize_array_h
++  end interface
++
++  interface acc_delete
++    procedure :: acc_delete_32_h
++    procedure :: acc_delete_64_h
++    procedure :: acc_delete_array_h
++  end interface
++
++  interface acc_delete_finalize
++    procedure :: acc_delete_finalize_32_h
++    procedure :: acc_delete_finalize_64_h
++    procedure :: acc_delete_finalize_array_h
++  end interface
++
++  interface acc_update_device
++    procedure :: acc_update_device_32_h
++    procedure :: acc_update_device_64_h
++    procedure :: acc_update_device_array_h
++  end interface
++
++  interface acc_update_self
++    procedure :: acc_update_self_32_h
++    procedure :: acc_update_self_64_h
++    procedure :: acc_update_self_array_h
++  end interface
++
++  ! acc_map_data: Only available in C/C++
++  ! acc_unmap_data: Only available in C/C++
++  ! acc_deviceptr: Only available in C/C++
++  ! acc_hostptr: Only available in C/C++
++
++  interface acc_is_present
++    procedure :: acc_is_present_32_h
++    procedure :: acc_is_present_64_h
++    procedure :: acc_is_present_array_h
++  end interface
++
++  ! acc_memcpy_to_device: Only available in C/C++
++  ! acc_memcpy_from_device: Only available in C/C++
++
++  interface acc_copyin_async
++    procedure :: acc_copyin_async_32_h
++    procedure :: acc_copyin_async_64_h
++    procedure :: acc_copyin_async_array_h
++  end interface
++
++  interface acc_create_async
++    procedure :: acc_create_async_32_h
++    procedure :: acc_create_async_64_h
++    procedure :: acc_create_async_array_h
++  end interface
++
++  interface acc_copyout_async
++    procedure :: acc_copyout_async_32_h
++    procedure :: acc_copyout_async_64_h
++    procedure :: acc_copyout_async_array_h
++  end interface
++
++  interface acc_delete_async
++    procedure :: acc_delete_async_32_h
++    procedure :: acc_delete_async_64_h
++    procedure :: acc_delete_async_array_h
++  end interface
++
++  interface acc_update_device_async
++    procedure :: acc_update_device_async_32_h
++    procedure :: acc_update_device_async_64_h
++    procedure :: acc_update_device_async_array_h
++  end interface
++
++  interface acc_update_self_async
++    procedure :: acc_update_self_async_32_h
++    procedure :: acc_update_self_async_64_h
++    procedure :: acc_update_self_async_array_h
++  end interface
++
++end module
++
++function acc_get_num_devices_h (d)
++  use openacc_internal2, only: acc_get_num_devices_l
++  use openacc_kinds2
++  integer acc_get_num_devices_h
++  integer (acc_device_kind) d
++  acc_get_num_devices_h = acc_get_num_devices_l (d)
++end function
++
++subroutine acc_set_device_type_h (d)
++  use openacc_internal2, only: acc_set_device_type_l
++  use openacc_kinds2
++  integer (acc_device_kind) d
++  call acc_set_device_type_l (d)
++end subroutine
++
++function acc_get_device_type_h ()
++  use openacc_internal2, only: acc_get_device_type_l
++  use openacc_kinds2
++  integer (acc_device_kind) acc_get_device_type_h
++  acc_get_device_type_h = acc_get_device_type_l ()
++end function
++
++subroutine acc_set_device_num_h (n, d)
++  use openacc_internal2, only: acc_set_device_num_l
++  use openacc_kinds2
++  integer n
++  integer (acc_device_kind) d
++  call acc_set_device_num_l (n, d)
++end subroutine
++
++function acc_get_device_num_h (d)
++  use openacc_internal2, only: acc_get_device_num_l
++  use openacc_kinds2
++  integer acc_get_device_num_h
++  integer (acc_device_kind) d
++  acc_get_device_num_h = acc_get_device_num_l (d)
++end function
++
++function acc_async_test_h (a)
++  use openacc_internal2, only: acc_async_test_l
++  logical acc_async_test_h
++  integer a
++  if (acc_async_test_l (a) .eq. 1) then
++    acc_async_test_h = .TRUE.
++  else
++    acc_async_test_h = .FALSE.
++  end if
++end function
++
++function acc_async_test_all_h ()
++  use openacc_internal2, only: acc_async_test_all_l
++  logical acc_async_test_all_h
++  if (acc_async_test_all_l () .eq. 1) then
++    acc_async_test_all_h = .TRUE.
++  else
++    acc_async_test_all_h = .FALSE.
++  end if
++end function
++
++subroutine acc_wait_h (a)
++  use openacc_internal2, only: acc_wait_l
++  integer a
++  call acc_wait_l (a)
++end subroutine
++
++subroutine acc_wait_async_h (a1, a2)
++  use openacc_internal2, only: acc_wait_async_l
++  integer a1, a2
++  call acc_wait_async_l (a1, a2)
++end subroutine
++
++subroutine acc_wait_all_h ()
++  use openacc_internal2, only: acc_wait_all_l
++  call acc_wait_all_l ()
++end subroutine
++
++subroutine acc_wait_all_async_h (a)
++  use openacc_internal2, only: acc_wait_all_async_l
++  integer a
++  call acc_wait_all_async_l (a)
++end subroutine
++
++subroutine acc_init_h (d)
++  use openacc_internal2, only: acc_init_l
++  use openacc_kinds2
++  integer (acc_device_kind) d
++  call acc_init_l (d)
++end subroutine
++
++subroutine acc_shutdown_h (d)
++  use openacc_internal2, only: acc_shutdown_l
++  use openacc_kinds2
++  integer (acc_device_kind) d
++  call acc_shutdown_l (d)
++end subroutine
++
++function acc_on_device_h (d)
++  use openacc_internal2, only: acc_on_device_l
++  use openacc_kinds2
++  integer (acc_device_kind) d
++  logical acc_on_device_h
++  if (acc_on_device_l (d) .eq. 1) then
++    acc_on_device_h = .TRUE.
++  else
++    acc_on_device_h = .FALSE.
++  end if
++end function
++
++subroutine acc_copyin_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_copyin_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_copyin_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_array_h (a)
++  use openacc_internal2, only: acc_copyin_l
++  type (*), dimension (..), contiguous :: a
++  call acc_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_copyin_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_present_or_copyin_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_present_or_copyin_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_array_h (a)
++  use openacc_internal2, only: acc_present_or_copyin_l
++  type (*), dimension (..), contiguous :: a
++  call acc_present_or_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_create_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_create_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_create_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_array_h (a)
++  use openacc_internal2, only: acc_create_l
++  type (*), dimension (..), contiguous :: a
++  call acc_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_create_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_present_or_create_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_present_or_create_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_array_h (a)
++  use openacc_internal2, only: acc_present_or_create_l
++  type (*), dimension (..), contiguous :: a
++  call acc_present_or_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_copyout_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_copyout_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_copyout_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_array_h (a)
++  use openacc_internal2, only: acc_copyout_l
++  type (*), dimension (..), contiguous :: a
++  call acc_copyout_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_copyout_finalize_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_copyout_finalize_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_finalize_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_copyout_finalize_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_finalize_array_h (a)
++  use openacc_internal2, only: acc_copyout_finalize_l
++  type (*), dimension (..), contiguous :: a
++  call acc_copyout_finalize_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_delete_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_delete_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_delete_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_array_h (a)
++  use openacc_internal2, only: acc_delete_l
++  type (*), dimension (..), contiguous :: a
++  call acc_delete_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_delete_finalize_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_delete_finalize_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_delete_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_finalize_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_delete_finalize_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_delete_finalize_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_finalize_array_h (a)
++  use openacc_internal2, only: acc_delete_finalize_l
++  type (*), dimension (..), contiguous :: a
++  call acc_delete_finalize_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_device_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_update_device_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_update_device_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_array_h (a)
++  use openacc_internal2, only: acc_update_device_l
++  type (*), dimension (..), contiguous :: a
++  call acc_update_device_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_self_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_update_self_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_update_self_l
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_array_h (a)
++  use openacc_internal2, only: acc_update_self_l
++  type (*), dimension (..), contiguous :: a
++  call acc_update_self_l (a, sizeof (a))
++end subroutine
++
++function acc_is_present_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal2, only: acc_is_present_l
++  logical acc_is_present_32_h
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++    acc_is_present_32_h = .TRUE.
++  else
++    acc_is_present_32_h = .FALSE.
++  end if
++end function
++
++function acc_is_present_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal2, only: acc_is_present_l
++  logical acc_is_present_64_h
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++    acc_is_present_64_h = .TRUE.
++  else
++    acc_is_present_64_h = .FALSE.
++  end if
++end function
++
++function acc_is_present_array_h (a)
++  use openacc_internal2, only: acc_is_present_l
++  logical acc_is_present_array_h
++  type (*), dimension (..), contiguous :: a
++  acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
++end function
++
++subroutine acc_copyin_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_copyin_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyin_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_copyin_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyin_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_copyin_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_copyin_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_create_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_create_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_create_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_create_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_create_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_copyout_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_copyout_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_copyout_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_copyout_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_delete_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_delete_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_delete_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_delete_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_update_device_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_update_device_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_device_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_update_device_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_update_device_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_32_h (a, len, async)
++  use iso_c_binding, only: c_int32_t, c_size_t, c_int
++  use openacc_internal2, only: acc_update_self_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  integer (acc_handle_kind) async
++  call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_64_h (a, len, async)
++  use iso_c_binding, only: c_int64_t, c_size_t, c_int
++  use openacc_internal2, only: acc_update_self_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  integer (acc_handle_kind) async
++  call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
++end subroutine
++
++subroutine acc_update_self_async_array_h (a, async)
++  use iso_c_binding, only: c_int
++  use openacc_internal2, only: acc_update_self_async_l
++  use openacc_kinds2, only: acc_handle_kind
++  type (*), dimension (..), contiguous :: a
++  integer (acc_handle_kind) async
++  call acc_update_self_async_l (a, sizeof (a), int (async, kind = c_int))
++end subroutine
+--- libgomp/taskloop.c.jj	2018-04-25 09:40:31.913655581 +0200
++++ libgomp/taskloop.c	2019-05-07 18:46:36.547109400 +0200
+@@ -149,11 +149,28 @@ GOMP_taskloop (void (*fn) (void *), void
+ 
+   if (flags & GOMP_TASK_FLAG_NOGROUP)
+     {
+-      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+-	return;
++      if (__builtin_expect (gomp_cancel_var, 0)
++	  && thr->task
++	  && thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return;
++	}
+     }
+   else
+-    ialias_call (GOMP_taskgroup_start) ();
++    {
++      ialias_call (GOMP_taskgroup_start) ();
++      if (flags & GOMP_TASK_FLAG_REDUCTION)
++	{
++	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
++	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
++	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
++	}
++    }
+ 
+   if (priority > gomp_max_task_priority_var)
+     priority = gomp_max_task_priority_var;
+@@ -284,19 +301,31 @@ GOMP_taskloop (void (*fn) (void *), void
+       gomp_mutex_lock (&team->task_lock);
+       /* If parallel or taskgroup has been cancelled, don't start new
+ 	 tasks.  */
+-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+-			     || (taskgroup && taskgroup->cancelled))
+-			    && cpyfn == NULL, 0))
++      if (__builtin_expect (gomp_cancel_var, 0)
++	  && cpyfn == NULL)
+ 	{
+-	  gomp_mutex_unlock (&team->task_lock);
+-	  for (i = 0; i < num_tasks; i++)
++	  if (gomp_team_barrier_cancelled (&team->barrier))
++	    {
++	    do_cancel:
++	      gomp_mutex_unlock (&team->task_lock);
++	      for (i = 0; i < num_tasks; i++)
++		{
++		  gomp_finish_task (tasks[i]);
++		  free (tasks[i]);
++		}
++	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
++		ialias_call (GOMP_taskgroup_end) ();
++	      return;
++	    }
++	  if (taskgroup)
+ 	    {
+-	      gomp_finish_task (tasks[i]);
+-	      free (tasks[i]);
++	      if (taskgroup->cancelled)
++		goto do_cancel;
++	      if (taskgroup->workshare
++		  && taskgroup->prev
++		  && taskgroup->prev->cancelled)
++		goto do_cancel;
+ 	    }
+-	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+-	    ialias_call (GOMP_taskgroup_end) ();
+-	  return;
+ 	}
+       if (taskgroup)
+ 	taskgroup->num_children += num_tasks;
+--- libgomp/parallel.c.jj	2018-04-25 09:40:31.926655587 +0200
++++ libgomp/parallel.c	2019-05-07 18:46:36.532109640 +0200
+@@ -123,7 +123,8 @@ void
+ GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
+ {
+   num_threads = gomp_resolve_num_threads (num_threads, 0);
+-  gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
++  gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
++		   NULL);
+ }
+ 
+ void
+@@ -161,14 +162,33 @@ GOMP_parallel_end (void)
+ ialias (GOMP_parallel_end)
+ 
+ void
+-GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
++GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
++	       unsigned int flags)
+ {
+   num_threads = gomp_resolve_num_threads (num_threads, 0);
+-  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
++  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
++		   NULL);
+   fn (data);
+   ialias_call (GOMP_parallel_end) ();
+ }
+ 
++unsigned
++GOMP_parallel_reductions (void (*fn) (void *), void *data,
++			  unsigned num_threads, unsigned int flags)
++{
++  struct gomp_taskgroup *taskgroup;
++  num_threads = gomp_resolve_num_threads (num_threads, 0);
++  uintptr_t *rdata = *(uintptr_t **)data;
++  taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
++  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
++		   taskgroup);
++  fn (data);
++  ialias_call (GOMP_parallel_end) ();
++  gomp_sem_destroy (&taskgroup->taskgroup_sem);
++  free (taskgroup);
++  return num_threads;
++}
++
+ bool
+ GOMP_cancellation_point (int which)
+ {
+@@ -185,8 +205,15 @@ GOMP_cancellation_point (int which)
+     }
+   else if (which & GOMP_CANCEL_TASKGROUP)
+     {
+-      if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
+-	return true;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return true;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return true;
++	}
+       /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
+ 	 as #pragma omp cancel parallel also cancels all explicit
+ 	 tasks.  */
+@@ -218,11 +245,17 @@ GOMP_cancel (int which, bool do_cancel)
+     }
+   else if (which & GOMP_CANCEL_TASKGROUP)
+     {
+-      if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
++      if (thr->task->taskgroup)
+ 	{
+-	  gomp_mutex_lock (&team->task_lock);
+-	  thr->task->taskgroup->cancelled = true;
+-	  gomp_mutex_unlock (&team->task_lock);
++	  struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
++	  if (taskgroup->workshare && taskgroup->prev)
++	    taskgroup = taskgroup->prev;
++	  if (!taskgroup->cancelled)
++	    {
++	      gomp_mutex_lock (&team->task_lock);
++	      taskgroup->cancelled = true;
++	      gomp_mutex_unlock (&team->task_lock);
++	    }
+ 	}
+       return true;
+     }
+--- libgomp/oacc-plugin.h.jj	2018-04-25 09:40:31.322655307 +0200
++++ libgomp/oacc-plugin.h	2019-05-07 18:46:36.531109656 +0200
+@@ -29,5 +29,6 @@
+ 
+ extern void GOMP_PLUGIN_async_unmap_vars (void *, int);
+ extern void *GOMP_PLUGIN_acc_thread (void);
++extern int GOMP_PLUGIN_acc_default_dim (unsigned int);
+ 
+ #endif
+--- libgomp/target.c.jj	2018-04-25 09:40:31.912655580 +0200
++++ libgomp/target.c	2019-05-07 19:07:21.032306327 +0200
+@@ -180,16 +180,22 @@ gomp_device_copy (struct gomp_device_des
+ /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
+    host to device memory transfers.  */
+ 
++struct gomp_coalesce_chunk
++{
++  /* The starting and ending point of a coalesced chunk of memory.  */
++  size_t start, end;
++};
++
+ struct gomp_coalesce_buf
+ {
+   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
+      it will be copied to the device.  */
+   void *buf;
+   struct target_mem_desc *tgt;
+-  /* Array with offsets, chunks[2 * i] is the starting offset and
+-     chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
++  /* Array with offsets, chunks[i].start is the starting offset and
++     chunks[i].end ending offset relative to tgt->tgt_start device address
+      of chunks which are to be copied to buf and later copied to device.  */
+-  size_t *chunks;
++  struct gomp_coalesce_chunk *chunks;
+   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
+      be performed.  */
+   long chunk_cnt;
+@@ -222,14 +228,14 @@ gomp_coalesce_buf_add (struct gomp_coale
+     {
+       if (cbuf->chunk_cnt < 0)
+ 	return;
+-      if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
++      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
+ 	{
+ 	  cbuf->chunk_cnt = -1;
+ 	  return;
+ 	}
+-      if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
++      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
+ 	{
+-	  cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
++	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
+ 	  cbuf->use_cnt++;
+ 	  return;
+ 	}
+@@ -239,8 +245,8 @@ gomp_coalesce_buf_add (struct gomp_coale
+       if (cbuf->use_cnt == 1)
+ 	cbuf->chunk_cnt--;
+     }
+-  cbuf->chunks[2 * cbuf->chunk_cnt] = start;
+-  cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
++  cbuf->chunks[cbuf->chunk_cnt].start = start;
++  cbuf->chunks[cbuf->chunk_cnt].end = start + len;
+   cbuf->chunk_cnt++;
+   cbuf->use_cnt = 1;
+ }
+@@ -271,20 +277,20 @@ gomp_copy_host2dev (struct gomp_device_d
+   if (cbuf)
+     {
+       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
+-      if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
++      if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
+ 	{
+ 	  long first = 0;
+ 	  long last = cbuf->chunk_cnt - 1;
+ 	  while (first <= last)
+ 	    {
+ 	      long middle = (first + last) >> 1;
+-	      if (cbuf->chunks[2 * middle + 1] <= doff)
++	      if (cbuf->chunks[middle].end <= doff)
+ 		first = middle + 1;
+-	      else if (cbuf->chunks[2 * middle] <= doff)
++	      else if (cbuf->chunks[middle].start <= doff)
+ 		{
+-		  if (doff + sz > cbuf->chunks[2 * middle + 1])
++		  if (doff + sz > cbuf->chunks[middle].end)
+ 		    gomp_fatal ("internal libgomp cbuf error");
+-		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
++		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
+ 			  h, sz);
+ 		  return;
+ 		}
+@@ -510,8 +516,8 @@ gomp_map_vars (struct gomp_device_descr
+   cbuf.buf = NULL;
+   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
+     {
+-      cbuf.chunks
+-	= (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
++      size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
++      cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
+       cbuf.chunk_cnt = 0;
+     }
+   if (pragma_kind == GOMP_MAP_VARS_TARGET)
+@@ -521,8 +527,8 @@ gomp_map_vars (struct gomp_device_descr
+       tgt_size = mapnum * sizeof (void *);
+       cbuf.chunk_cnt = 1;
+       cbuf.use_cnt = 1 + (mapnum > 1);
+-      cbuf.chunks[0] = 0;
+-      cbuf.chunks[1] = tgt_size;
++      cbuf.chunks[0].start = 0;
++      cbuf.chunks[0].end = tgt_size;
+     }
+ 
+   gomp_mutex_lock (&devicep->lock);
+@@ -707,7 +713,7 @@ gomp_map_vars (struct gomp_device_descr
+       if (cbuf.chunk_cnt > 0)
+ 	{
+ 	  cbuf.buf
+-	    = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
++	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
+ 	  if (cbuf.buf)
+ 	    {
+ 	      cbuf.tgt = tgt;
+@@ -859,6 +865,7 @@ gomp_map_vars (struct gomp_device_descr
+ 		tgt->list[i].offset = 0;
+ 		tgt->list[i].length = k->host_end - k->host_start;
+ 		k->refcount = 1;
++		k->dynamic_refcount = 0;
+ 		tgt->refcount++;
+ 		array->left = NULL;
+ 		array->right = NULL;
+@@ -956,9 +963,10 @@ gomp_map_vars (struct gomp_device_descr
+ 		    /* Set link pointer on target to the device address of the
+ 		       mapped object.  */
+ 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
+-		    devicep->host2dev_func (devicep->target_id,
+-					    (void *) n->tgt_offset,
+-					    &tgt_addr, sizeof (void *));
++		    /* We intentionally do not use coalescing here, as it's not
++		       data allocated by the current call to this function.  */
++		    gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
++					&tgt_addr, sizeof (void *), NULL);
+ 		  }
+ 		array++;
+ 	      }
+@@ -981,10 +989,14 @@ gomp_map_vars (struct gomp_device_descr
+     {
+       long c = 0;
+       for (c = 0; c < cbuf.chunk_cnt; ++c)
+-	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
+-			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
+-			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
++	gomp_copy_host2dev (devicep,
++			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
++			    (char *) cbuf.buf + (cbuf.chunks[c].start
++						 - cbuf.chunks[0].start),
++			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
+       free (cbuf.buf);
++      cbuf.buf = NULL;
++      cbufp = NULL;
+     }
+ 
+   /* If the variable from "omp target enter data" map-list was already mapped,
+@@ -1011,6 +1023,23 @@ gomp_unmap_tgt (struct target_mem_desc *
+   free (tgt);
+ }
+ 
++attribute_hidden bool
++gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
++{
++  bool is_tgt_unmapped = false;
++  splay_tree_remove (&devicep->mem_map, k);
++  if (k->link_key)
++    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
++  if (k->tgt->refcount > 1)
++    k->tgt->refcount--;
++  else
++    {
++      is_tgt_unmapped = true;
++      gomp_unmap_tgt (k->tgt);
++    }
++  return is_tgt_unmapped;
++}
++
+ /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
+    variables back from device to host: if it is false, it is assumed that this
+    has been done already.  */
+@@ -1059,16 +1088,7 @@ gomp_unmap_vars (struct target_mem_desc
+ 				      + tgt->list[i].offset),
+ 			    tgt->list[i].length);
+       if (do_unmap)
+-	{
+-	  splay_tree_remove (&devicep->mem_map, k);
+-	  if (k->link_key)
+-	    splay_tree_insert (&devicep->mem_map,
+-			       (splay_tree_node) k->link_key);
+-	  if (k->tgt->refcount > 1)
+-	    k->tgt->refcount--;
+-	  else
+-	    gomp_unmap_tgt (k->tgt);
+-	}
++	gomp_remove_var (devicep, k);
+     }
+ 
+   if (tgt->refcount > 1)
+@@ -1298,17 +1318,7 @@ gomp_unload_image_from_device (struct go
+       else
+ 	{
+ 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
+-	  splay_tree_remove (&devicep->mem_map, n);
+-	  if (n->link_key)
+-	    {
+-	      if (n->tgt->refcount > 1)
+-		n->tgt->refcount--;
+-	      else
+-		{
+-		  is_tgt_unmapped = true;
+-		  gomp_unmap_tgt (n->tgt);
+-		}
+-	    }
++	  is_tgt_unmapped = gomp_remove_var (devicep, n);
+ 	}
+     }
+ 
+@@ -1855,11 +1865,20 @@ GOMP_target_update_ext (int device, size
+ 	      struct gomp_team *team = thr->ts.team;
+ 	      /* If parallel or taskgroup has been cancelled, don't start new
+ 		 tasks.  */
+-	      if (team
+-		  && (gomp_team_barrier_cancelled (&team->barrier)
+-		      || (thr->task->taskgroup
+-			  && thr->task->taskgroup->cancelled)))
+-		return;
++	      if (__builtin_expect (gomp_cancel_var, 0) && team)
++		{
++		  if (gomp_team_barrier_cancelled (&team->barrier))
++		    return;
++		  if (thr->task->taskgroup)
++		    {
++		      if (thr->task->taskgroup->cancelled)
++			return;
++		      if (thr->task->taskgroup->workshare
++			  && thr->task->taskgroup->prev
++			  && thr->task->taskgroup->prev->cancelled)
++			return;
++		    }
++		}
+ 
+ 	      gomp_task_maybe_wait_for_dependencies (depend);
+ 	    }
+@@ -1874,10 +1893,20 @@ GOMP_target_update_ext (int device, size
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+-  if (team
+-      && (gomp_team_barrier_cancelled (&team->barrier)
+-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+-    return;
++  if (__builtin_expect (gomp_cancel_var, 0) && team)
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return;
++	}
++    }
+ 
+   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
+ }
+@@ -1986,11 +2015,20 @@ GOMP_target_enter_exit_data (int device,
+ 	      struct gomp_team *team = thr->ts.team;
+ 	      /* If parallel or taskgroup has been cancelled, don't start new
+ 		 tasks.  */
+-	      if (team
+-		  && (gomp_team_barrier_cancelled (&team->barrier)
+-		      || (thr->task->taskgroup
+-			  && thr->task->taskgroup->cancelled)))
+-		return;
++	      if (__builtin_expect (gomp_cancel_var, 0) && team)
++		{
++		  if (gomp_team_barrier_cancelled (&team->barrier))
++		    return;
++		  if (thr->task->taskgroup)
++		    {
++		      if (thr->task->taskgroup->cancelled)
++			return;
++		      if (thr->task->taskgroup->workshare
++			  && thr->task->taskgroup->prev
++			  && thr->task->taskgroup->prev->cancelled)
++			return;
++		    }
++		}
+ 
+ 	      gomp_task_maybe_wait_for_dependencies (depend);
+ 	    }
+@@ -2005,10 +2043,20 @@ GOMP_target_enter_exit_data (int device,
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+-  if (team
+-      && (gomp_team_barrier_cancelled (&team->barrier)
+-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+-    return;
++  if (__builtin_expect (gomp_cancel_var, 0) && team)
++    {
++      if (gomp_team_barrier_cancelled (&team->barrier))
++	return;
++      if (thr->task->taskgroup)
++	{
++	  if (thr->task->taskgroup->cancelled)
++	    return;
++	  if (thr->task->taskgroup->workshare
++	      && thr->task->taskgroup->prev
++	      && thr->task->taskgroup->prev->cancelled)
++	    return;
++	}
++    }
+ 
+   size_t i;
+   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+@@ -2197,8 +2245,9 @@ omp_target_is_present (void *ptr, int de
+ }
+ 
+ int
+-omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+-		   size_t src_offset, int dst_device_num, int src_device_num)
++omp_target_memcpy (void *dst, void *src, size_t length,
++		   size_t dst_offset, size_t src_offset, int dst_device_num,
++		   int src_device_num)
+ {
+   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+   bool ret;
+@@ -2287,21 +2336,25 @@ omp_target_memcpy_rect_worker (void *dst
+ 	return EINVAL;
+       if (dst_devicep == NULL && src_devicep == NULL)
+ 	{
+-	  memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
++	  memcpy ((char *) dst + dst_off, (char *) src + src_off,
++		  length);
+ 	  ret = 1;
+ 	}
+       else if (src_devicep == NULL)
+ 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
+ 					  (char *) dst + dst_off,
+-					  (char *) src + src_off, length);
++					  (char *) src + src_off,
++					  length);
+       else if (dst_devicep == NULL)
+ 	ret = src_devicep->dev2host_func (src_devicep->target_id,
+ 					  (char *) dst + dst_off,
+-					  (char *) src + src_off, length);
++					  (char *) src + src_off,
++					  length);
+       else if (src_devicep == dst_devicep)
+ 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
+ 					 (char *) dst + dst_off,
+-					 (char *) src + src_off, length);
++					 (char *) src + src_off,
++					 length);
+       else
+ 	ret = 0;
+       return ret ? 0 : EINVAL;
+@@ -2396,8 +2449,8 @@ omp_target_memcpy_rect (void *dst, void
+ }
+ 
+ int
+-omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+-			  size_t device_offset, int device_num)
++omp_target_associate_ptr (void *host_ptr, void *device_ptr,
++			  size_t size, size_t device_offset, int device_num)
+ {
+   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+     return EINVAL;
+@@ -2499,6 +2552,31 @@ omp_target_disassociate_ptr (void *ptr,
+   return ret;
+ }
+ 
++int
++omp_pause_resource (omp_pause_resource_t kind, int device_num)
++{
++  (void) kind;
++  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
++    return gomp_pause_host ();
++  if (device_num < 0 || device_num >= gomp_get_num_devices ())
++    return -1;
++  /* Do nothing for target devices for now.  */
++  return 0;
++}
++
++int
++omp_pause_resource_all (omp_pause_resource_t kind)
++{
++  (void) kind;
++  if (gomp_pause_host ())
++    return -1;
++  /* Do nothing for target devices for now.  */
++  return 0;
++}
++
++ialias (omp_pause_resource)
++ialias (omp_pause_resource_all)
++
+ #ifdef PLUGIN_SUPPORT
+ 
+ /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
+@@ -2632,9 +2710,9 @@ gomp_target_fini (void)
+     }
+ }
+ 
+-/* This function initializes the runtime needed for offloading.
+-   It parses the list of offload targets and tries to load the plugins for
+-   these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
++/* This function initializes the runtime for offloading.
++   It parses the list of offload plugins, and tries to load these.
++   On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
+    will be set, and the array DEVICES initialized, containing descriptors for
+    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
+    by the others.  */
+@@ -2651,7 +2729,7 @@ gomp_target_init (void)
+   num_devices = 0;
+   devices = NULL;
+ 
+-  cur = OFFLOAD_TARGETS;
++  cur = OFFLOAD_PLUGINS;
+   if (*cur)
+     do
+       {
+--- libgomp/ordered.c.jj	2018-04-25 09:40:31.926655587 +0200
++++ libgomp/ordered.c	2019-05-07 18:46:36.532109640 +0200
+@@ -259,7 +259,8 @@ GOMP_ordered_end (void)
+ #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
+ 
+ void
+-gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size,
++		    size_t extra)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+@@ -269,13 +270,24 @@ gomp_doacross_init (unsigned ncounts, lo
+   struct gomp_doacross_work_share *doacross;
+ 
+   if (team == NULL || team->nthreads == 1)
+-    return;
++    {
++    empty:
++      if (!extra)
++	ws->doacross = NULL;
++      else
++	{
++	  doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
++	  doacross->extra = (void *) (doacross + 1);
++	  ws->doacross = doacross;
++	}
++      return;
++    }
+ 
+   for (i = 0; i < ncounts; i++)
+     {
+       /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
+       if (counts[i] == 0)
+-	return;
++	goto empty;
+ 
+       if (num_bits <= MAX_COLLAPSED_BITS)
+ 	{
+@@ -314,7 +326,7 @@ gomp_doacross_init (unsigned ncounts, lo
+   elt_sz = (elt_sz + 63) & ~63UL;
+ 
+   doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
+-			  + shift_sz);
++			  + shift_sz + extra);
+   doacross->chunk_size = chunk_size;
+   doacross->elt_sz = elt_sz;
+   doacross->ncounts = ncounts;
+@@ -322,6 +334,13 @@ gomp_doacross_init (unsigned ncounts, lo
+   doacross->array = (unsigned char *)
+ 		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ 		     & ~(uintptr_t) 63);
++  if (extra)
++    {
++      doacross->extra = doacross->array + num_ents * elt_sz;
++      memset (doacross->extra, '\0', extra);
++    }
++  else
++    doacross->extra = NULL;
+   if (num_bits <= MAX_COLLAPSED_BITS)
+     {
+       unsigned int shift_count = 0;
+@@ -360,7 +379,8 @@ GOMP_doacross_post (long *counts)
+   unsigned long ent;
+   unsigned int i;
+ 
+-  if (__builtin_expect (doacross == NULL, 0))
++  if (__builtin_expect (doacross == NULL, 0)
++      || __builtin_expect (doacross->array == NULL, 0))
+     {
+       __sync_synchronize ();
+       return;
+@@ -411,7 +431,8 @@ GOMP_doacross_wait (long first, ...)
+   unsigned long ent;
+   unsigned int i;
+ 
+-  if (__builtin_expect (doacross == NULL, 0))
++  if (__builtin_expect (doacross == NULL, 0)
++      || __builtin_expect (doacross->array == NULL, 0))
+     {
+       __sync_synchronize ();
+       return;
+@@ -488,7 +509,8 @@ GOMP_doacross_wait (long first, ...)
+ typedef unsigned long long gomp_ull;
+ 
+ void
+-gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts,
++			gomp_ull chunk_size, size_t extra)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+@@ -498,13 +520,24 @@ gomp_doacross_ull_init (unsigned ncounts
+   struct gomp_doacross_work_share *doacross;
+ 
+   if (team == NULL || team->nthreads == 1)
+-    return;
++    {
++    empty:
++      if (!extra)
++	ws->doacross = NULL;
++      else
++	{
++	  doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
++	  doacross->extra = (void *) (doacross + 1);
++	  ws->doacross = doacross;
++	}
++      return;
++    }
+ 
+   for (i = 0; i < ncounts; i++)
+     {
+       /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
+       if (counts[i] == 0)
+-	return;
++	goto empty;
+ 
+       if (num_bits <= MAX_COLLAPSED_BITS)
+ 	{
+@@ -557,6 +590,13 @@ gomp_doacross_ull_init (unsigned ncounts
+   doacross->array = (unsigned char *)
+ 		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
+ 		     & ~(uintptr_t) 63);
++  if (extra)
++    {
++      doacross->extra = doacross->array + num_ents * elt_sz;
++      memset (doacross->extra, '\0', extra);
++    }
++  else
++    doacross->extra = NULL;
+   if (num_bits <= MAX_COLLAPSED_BITS)
+     {
+       unsigned int shift_count = 0;
+@@ -595,7 +635,8 @@ GOMP_doacross_ull_post (gomp_ull *counts
+   unsigned long ent;
+   unsigned int i;
+ 
+-  if (__builtin_expect (doacross == NULL, 0))
++  if (__builtin_expect (doacross == NULL, 0)
++      || __builtin_expect (doacross->array == NULL, 0))
+     {
+       __sync_synchronize ();
+       return;
+@@ -667,7 +708,8 @@ GOMP_doacross_ull_wait (gomp_ull first,
+   unsigned long ent;
+   unsigned int i;
+ 
+-  if (__builtin_expect (doacross == NULL, 0))
++  if (__builtin_expect (doacross == NULL, 0)
++      || __builtin_expect (doacross->array == NULL, 0))
+     {
+       __sync_synchronize ();
+       return;
+--- libgomp/alloc.c.jj	2018-04-25 09:40:31.926655587 +0200
++++ libgomp/alloc.c	2019-05-07 18:46:36.336112770 +0200
+@@ -57,3 +57,50 @@ gomp_realloc (void *old, size_t size)
+     gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
+   return ret;
+ }
++
++void *
++gomp_aligned_alloc (size_t al, size_t size)
++{
++  void *ret;
++  if (al < sizeof (void *))
++    al = sizeof (void *);
++#ifdef HAVE_ALIGNED_ALLOC
++  ret = aligned_alloc (al, size);
++#elif defined(HAVE__ALIGNED_MALLOC)
++  ret = _aligned_malloc (size, al);
++#elif defined(HAVE_POSIX_MEMALIGN)
++  if (posix_memalign (&ret, al, size) != 0)
++    ret = NULL;
++#elif defined(HAVE_MEMALIGN)
++  {
++    extern void *memalign (size_t, size_t);
++    ret = memalign (al, size);
++  }
++#else
++  ret = NULL;
++  if ((al & (al - 1)) == 0 && size)
++    {
++      void *p = malloc (size + al);
++      if (p)
++	{
++	  void *ap = (void *) (((uintptr_t) p + al) & -al);
++	  ((void **) ap)[-1] = p;
++	  ret = ap;
++	}
++    }
++#endif
++  if (ret == NULL)
++    gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
++  return ret;
++}
++
++void
++gomp_aligned_free (void *ptr)
++{
++#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
++  free (ptr);
++#else
++  if (ptr)
++    free (((void **) ptr)[-1]);
++#endif
++}
+--- libgomp/configure.ac.jj	2018-04-25 09:40:31.321655307 +0200
++++ libgomp/configure.ac	2019-05-07 18:46:36.471110614 +0200
+@@ -219,6 +219,7 @@ m4_include([plugin/configfrag.ac])
+ 
+ # Check for functions needed.
+ AC_CHECK_FUNCS(getloadavg clock_gettime strtoull)
++AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc)
+ 
+ # Check for broken semaphore implementation on darwin.
+ # sem_init returns: sem_init error: Function not implemented.
+@@ -266,6 +267,41 @@ if test $ac_cv_func_clock_gettime = no;
+ 	       [Define to 1 if you have the `clock_gettime' function.])])
+ fi
+ 
++# Check for uname.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++  [#include <string.h>
++   #include <stdlib.h>
++   #include <sys/utsname.h>],
++  [struct utsname buf;
++   volatile size_t len = 0;
++   if (!uname (buf))
++     len = strlen (buf.nodename);])],
++  AC_DEFINE(HAVE_UNAME, 1,
++[	Define if uname is supported and struct utsname has nodename field.]))
++
++# Check for gethostname.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++  [#include <unistd.h>],
++  [
++changequote(,)dnl
++   char buf[256];
++   if (gethostname (buf, sizeof (buf) - 1) == 0)
++     buf[255] = '\0';
++changequote([,])dnl
++  ])],
++  AC_DEFINE(HAVE_GETHOSTNAME, 1,
++[	Define if gethostname is supported.]))
++
++# Check for getpid.
++AC_COMPILE_IFELSE(
++ [AC_LANG_PROGRAM(
++  [#include <unistd.h>],
++  [int pid = getpid ();])],
++  AC_DEFINE(HAVE_GETPID, 1,
++[	Define if getpid is supported.]))
++
+ # See if we support thread-local storage.
+ GCC_CHECK_TLS
+ 
+--- libgomp/icv.c.jj	2018-04-25 09:40:31.870655561 +0200
++++ libgomp/icv.c	2019-05-07 18:46:36.501110134 +0200
+@@ -69,7 +69,7 @@ void
+ omp_set_schedule (omp_sched_t kind, int chunk_size)
+ {
+   struct gomp_task_icv *icv = gomp_icv (true);
+-  switch (kind)
++  switch (kind & ~omp_sched_monotonic)
+     {
+     case omp_sched_static:
+       if (chunk_size < 1)
+--- libgomp/configure.jj	2018-04-25 09:40:31.913655581 +0200
++++ libgomp/configure	2019-05-07 18:47:37.961128420 +0200
+@@ -636,6 +636,8 @@ PLUGIN_NVPTX_FALSE
+ PLUGIN_NVPTX_TRUE
+ offload_additional_lib_paths
+ offload_additional_options
++offload_targets
++offload_plugins
+ PLUGIN_HSA_LIBS
+ PLUGIN_HSA_LDFLAGS
+ PLUGIN_HSA_CPPFLAGS
+@@ -648,7 +650,6 @@ PLUGIN_NVPTX_CPPFLAGS
+ PLUGIN_NVPTX
+ CUDA_DRIVER_LIB
+ CUDA_DRIVER_INCLUDE
+-offload_targets
+ libtool_VERSION
+ ac_ct_FC
+ FCFLAGS
+@@ -11157,7 +11158,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 11160 "configure"
++#line 11161 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -11263,7 +11264,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 11266 "configure"
++#line 11267 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -15167,8 +15168,6 @@ fi
+ # see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ # <http://www.gnu.org/licenses/>.
+ 
+-offload_targets=
+-
+ plugin_support=yes
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
+ $as_echo_n "checking for dlsym in -ldl... " >&6; }
+@@ -15302,7 +15301,11 @@ if test "${with_cuda_driver_lib+set}" =
+ fi
+ 
+ case "x$with_cuda_driver" in
+-  x | xno) ;;
++  x) ;;
++  xno)
++    CUDA_DRIVER_INCLUDE=no
++    CUDA_DRIVER_LIB=no
++    ;;
+   *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+      CUDA_DRIVER_LIB=$with_cuda_driver/lib
+      ;;
+@@ -15313,10 +15316,12 @@ fi
+ if test "x$with_cuda_driver_lib" != x; then
+   CUDA_DRIVER_LIB=$with_cuda_driver_lib
+ fi
+-if test "x$CUDA_DRIVER_INCLUDE" != x; then
++if test "x$CUDA_DRIVER_INCLUDE" != x \
++   && test "x$CUDA_DRIVER_INCLUDE" != xno; then
+   CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+ fi
+-if test "x$CUDA_DRIVER_LIB" != x; then
++if test "x$CUDA_DRIVER_LIB" != x \
++   && test "x$CUDA_DRIVER_LIB" != xno; then
+   CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+ fi
+ 
+@@ -15383,7 +15388,13 @@ PLUGIN_HSA_LIBS=
+ 
+ 
+ 
+-# Get offload targets and path to install tree of offloading compiler.
++# Parse '--enable-offload-targets', figure out the corresponding libgomp
++# plugins, and configure to find the corresponding offload compilers.
++# 'offload_plugins' and 'offload_targets' will be populated in the same order.
++offload_plugins=
++offload_targets=
++
++
+ offload_additional_options=
+ offload_additional_lib_paths=
+ 
+@@ -15403,10 +15403,10 @@ if test x"$enable_offload_targets" != x;
+   for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+     tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+     tgt=`echo $tgt | sed 's/=.*//'`
+-    tgt_name=
++    tgt_plugin=
+     case $tgt in
+       *-intelmic-* | *-intelmicemul-*)
+-	tgt_name=intelmic
++	tgt_plugin=intelmic
+ 	;;
+       nvptx*)
+ 	case "${target}" in
+@@ -15418,19 +15418,21 @@ if test x"$enable_offload_targets" != x;
+ 		PLUGIN_NVPTX=0
+ 		;;
+ 	      *)
+-		tgt_name=nvptx
++		tgt_plugin=nvptx
+ 		PLUGIN_NVPTX=$tgt
+-		PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+-		PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+-		PLUGIN_NVPTX_LIBS='-lcuda'
++		if test "x$CUDA_DRIVER_LIB" != xno \
++		   && test "x$CUDA_DRIVER_LIB" != xno; then
++		  PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
++		  PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
++		  PLUGIN_NVPTX_LIBS='-lcuda'
+ 
+-		PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+-		CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+-		PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+-		LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+-		PLUGIN_NVPTX_save_LIBS=$LIBS
+-		LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+-		cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++		  PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
++		  CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
++		  PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
++		  LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
++		  PLUGIN_NVPTX_save_LIBS=$LIBS
++		  LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
++		  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ #include "cuda.h"
+ int
+@@ -15446,13 +15448,16 @@ if ac_fn_c_try_link "$LINENO"; then :
+ fi
+ rm -f core conftest.err conftest.$ac_objext \
+     conftest$ac_exeext conftest.$ac_ext
+-		CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+-		LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+-		LIBS=$PLUGIN_NVPTX_save_LIBS
++		  CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
++		  LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
++		  LIBS=$PLUGIN_NVPTX_save_LIBS
++		fi
+ 		case $PLUGIN_NVPTX in
+ 		  nvptx*)
+-		    if test "x$CUDA_DRIVER_INCLUDE" = x \
+-		       && test "x$CUDA_DRIVER_LIB" = x; then
++		    if (test "x$CUDA_DRIVER_INCLUDE" = x \
++			|| test "x$CUDA_DRIVER_INCLUDE" = xno) \
++		       && (test "x$CUDA_DRIVER_LIB" = x \
++			   || test "x$CUDA_DRIVER_LIB" = xno); then
+ 		      PLUGIN_NVPTX=1
+ 		      PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
+ 		      PLUGIN_NVPTX_LIBS='-ldl'
+@@ -15452,7 +15468,7 @@ rm -f core conftest.err conftest.$ac_obj
+ 	        PLUGIN_HSA=0
+ 		;;
+ 	      *)
+-	        tgt_name=hsa
++		tgt_plugin=hsa
+ 	        PLUGIN_HSA=$tgt
+ 	        PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ 	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+@@ -15470,7 +15486,7 @@ rm -f core conftest.err conftest.$ac_obj
+ 	        LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ 	        LIBS=$PLUGIN_HSA_save_LIBS
+ 	        case $PLUGIN_HSA in
+-	          hsa*)
++		  hsa*)
+ 	            HSA_PLUGIN=0
+ 	            as_fn_error "HSA run-time package required for HSA support" "$LINENO" 5
+ 	            ;;
+@@ -15487,16 +15503,19 @@ rm -f core conftest.err conftest.$ac_obj
+ 	as_fn_error "unknown offload target specified" "$LINENO" 5
+ 	;;
+     esac
+-    if test x"$tgt_name" = x; then
+-      # Don't configure libgomp for this offloading target if we don't build
+-      # the corresponding plugin.
++    if test x"$tgt_plugin" = x; then
++      # Not configuring libgomp for this offload target if we're not building
++      # the corresponding offload plugin.
+       continue
+-    elif test x"$offload_targets" = x; then
+-      offload_targets=$tgt_name
++    elif test x"$offload_plugins" = x; then
++      offload_plugins=$tgt_plugin
++      offload_targets=$tgt
+     else
+-      offload_targets=$offload_targets,$tgt_name
++      offload_plugins=$offload_plugins,$tgt_plugin
++      offload_targets=$offload_targets,$tgt
+     fi
+-    if test "$tgt_name" = hsa; then
++    # Configure additional search paths.
++    if test "$tgt_plugin" = hsa; then
+       # Offloading compilation is all handled by the target compiler.
+       :
+     elif test x"$tgt_dir" != x; then
+@@ -15510,7 +15529,7 @@ rm -f core conftest.err conftest.$ac_obj
+ fi
+ 
+ cat >>confdefs.h <<_ACEOF
+-#define OFFLOAD_TARGETS "$offload_targets"
++#define OFFLOAD_PLUGINS "$offload_plugins"
+ _ACEOF
+ 
+  if test $PLUGIN_NVPTX = 1; then
+@@ -15570,6 +15589,19 @@ _ACEOF
+ fi
+ done
+ 
++for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc
++do :
++  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
++ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
++eval as_val=\$$as_ac_var
++   if test "x$as_val" = x""yes; then :
++  cat >>confdefs.h <<_ACEOF
++#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
++_ACEOF
++
++fi
++done
++
+ 
+ # Check for broken semaphore implementation on darwin.
+ # sem_init returns: sem_init error: Function not implemented.
+@@ -15784,6 +15816,72 @@ fi
+ 
+ fi
+ 
++# Check for uname.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h.  */
++#include <string.h>
++   #include <stdlib.h>
++   #include <sys/utsname.h>
++int
++main ()
++{
++struct utsname buf;
++   volatile size_t len = 0;
++   if (!uname (buf))
++     len = strlen (buf.nodename);
++  ;
++  return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_UNAME 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
++# Check for gethostname.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h.  */
++#include <unistd.h>
++int
++main ()
++{
++
++   char buf[256];
++   if (gethostname (buf, sizeof (buf) - 1) == 0)
++     buf[255] = '\0';
++
++  ;
++  return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_GETHOSTNAME 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
++# Check for getpid.
++cat confdefs.h - <<_ACEOF >conftest.$ac_ext
++/* end confdefs.h.  */
++#include <unistd.h>
++int
++main ()
++{
++int pid = getpid ();
++  ;
++  return 0;
++}
++_ACEOF
++if ac_fn_c_try_compile "$LINENO"; then :
++
++$as_echo "#define HAVE_GETPID 1" >>confdefs.h
++
++fi
++rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
++
+ # See if we support thread-local storage.
+ 
+ 
+--- libgomp/Makefile.am.jj	2018-04-25 09:40:31.926655587 +0200
++++ libgomp/Makefile.am	2019-05-07 19:59:03.683989317 +0200
+@@ -63,12 +63,13 @@ libgomp_la_SOURCES = alloc.c atomic.c ba
+ 	parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \
+ 	proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
+ 	splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
+-	oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c
++	oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
++	affinity-fmt.c teams.c
+ 
+ include $(top_srcdir)/plugin/Makefrag.am
+ 
+ if USE_FORTRAN
+-libgomp_la_SOURCES += openacc.f90
++libgomp_la_SOURCES += openacc2.f90
+ endif
+ 
+ nodist_noinst_HEADERS = libgomp_f.h
+@@ -87,8 +88,6 @@ omp_lib_kinds.mod: omp_lib.mod
+ 	:
+ openacc_kinds.mod: openacc.mod
+ 	:
+-openacc.mod: openacc.lo
+-	:
+ %.mod: %.f90
+ 	$(FC) $(FCFLAGS) -fsyntax-only $<
+ fortran.lo: libgomp_f.h
+--- libgomp/oacc-mem.c.jj	2018-04-25 09:40:31.924655586 +0200
++++ libgomp/oacc-mem.c	2019-05-07 18:46:36.530109672 +0200
+@@ -153,8 +153,9 @@ acc_free (void *d)
+     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
+ }
+ 
+-void
+-acc_memcpy_to_device (void *d, void *h, size_t s)
++static void
++memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
++		      const char *libfnname)
+ {
+   /* No need to call lazy open here, as the device pointer must have
+      been obtained from a routine that did that.  */
+@@ -164,31 +165,49 @@ acc_memcpy_to_device (void *d, void *h,
+ 
+   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+     {
+-      memmove (d, h, s);
++      if (from)
++	memmove (h, d, s);
++      else
++	memmove (d, h, s);
+       return;
+     }
+ 
+-  if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
+-    gomp_fatal ("error in %s", __FUNCTION__);
++  if (async > acc_async_sync)
++    thr->dev->openacc.async_set_async_func (async);
++
++  bool ret = (from
++	      ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
++	      : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
++
++  if (async > acc_async_sync)
++    thr->dev->openacc.async_set_async_func (acc_async_sync);
++
++  if (!ret)
++    gomp_fatal ("error in %s", libfnname);
+ }
+ 
+ void
+-acc_memcpy_from_device (void *h, void *d, size_t s)
++acc_memcpy_to_device (void *d, void *h, size_t s)
+ {
+-  /* No need to call lazy open here, as the device pointer must have
+-     been obtained from a routine that did that.  */
+-  struct goacc_thread *thr = goacc_thread ();
++  memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
++}
+ 
+-  assert (thr && thr->dev);
++void
++acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
++{
++  memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
++}
+ 
+-  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+-    {
+-      memmove (h, d, s);
+-      return;
+-    }
++void
++acc_memcpy_from_device (void *h, void *d, size_t s)
++{
++  memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
++}
+ 
+-  if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
+-    gomp_fatal ("error in %s", __FUNCTION__);
++void
++acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
++{
++  memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
+ }
+ 
+ /* Return the device pointer that corresponds to host data H.  Or NULL
+@@ -347,6 +366,7 @@ acc_map_data (void *h, void *d, size_t s
+ 
+       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
+ 			   &kinds, true, GOMP_MAP_VARS_OPENACC);
++      tgt->list[0].key->refcount = REFCOUNT_INFINITY;
+     }
+ 
+   gomp_mutex_lock (&acc_dev->lock);
+@@ -389,6 +409,9 @@ acc_unmap_data (void *h)
+ 		  (void *) n->host_start, (int) host_size, (void *) h);
+     }
+ 
++  /* Mark for removal.  */
++  n->refcount = 1;
++
+   t = n->tgt;
+ 
+   if (t->refcount == 2)
+@@ -424,7 +447,7 @@ acc_unmap_data (void *h)
+ #define FLAG_COPY (1 << 2)
+ 
+ static void *
+-present_create_copy (unsigned f, void *h, size_t s)
++present_create_copy (unsigned f, void *h, size_t s, int async)
+ {
+   void *d;
+   splay_tree_key n;
+@@ -460,6 +483,11 @@ present_create_copy (unsigned f, void *h
+ 	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ 	}
+ 
++      if (n->refcount != REFCOUNT_INFINITY)
++	{
++	  n->refcount++;
++	  n->dynamic_refcount++;
++	}
+       gomp_mutex_unlock (&acc_dev->lock);
+     }
+   else if (!(f & FLAG_CREATE))
+@@ -481,8 +509,16 @@ present_create_copy (unsigned f, void *h
+ 
+       gomp_mutex_unlock (&acc_dev->lock);
+ 
++      if (async > acc_async_sync)
++	acc_dev->openacc.async_set_async_func (async);
++
+       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
+ 			   GOMP_MAP_VARS_OPENACC);
++      /* Initialize dynamic refcount.  */
++      tgt->list[0].key->dynamic_refcount = 1;
++
++      if (async > acc_async_sync)
++	acc_dev->openacc.async_set_async_func (acc_async_sync);
+ 
+       gomp_mutex_lock (&acc_dev->lock);
+ 
+@@ -499,53 +535,71 @@ present_create_copy (unsigned f, void *h
+ void *
+ acc_create (void *h, size_t s)
+ {
+-  return present_create_copy (FLAG_CREATE, h, s);
++  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
+ }
+ 
+-void *
+-acc_copyin (void *h, size_t s)
++void
++acc_create_async (void *h, size_t s, int async)
+ {
+-  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
++  present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
+ }
+ 
++/* acc_present_or_create used to be what acc_create is now.  */
++/* acc_pcreate is acc_present_or_create by a different name.  */
++#ifdef HAVE_ATTRIBUTE_ALIAS
++strong_alias (acc_create, acc_present_or_create)
++strong_alias (acc_create, acc_pcreate)
++#else
+ void *
+ acc_present_or_create (void *h, size_t s)
+ {
+-  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
++  return acc_create (h, s);
+ }
+ 
+-/* acc_pcreate is acc_present_or_create by a different name.  */
+-#ifdef HAVE_ATTRIBUTE_ALIAS
+-strong_alias (acc_present_or_create, acc_pcreate)
+-#else
+ void *
+ acc_pcreate (void *h, size_t s)
+ {
+-  return acc_present_or_create (h, s);
++  return acc_create (h, s);
+ }
+ #endif
+ 
+ void *
+-acc_present_or_copyin (void *h, size_t s)
++acc_copyin (void *h, size_t s)
++{
++  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
++			      acc_async_sync);
++}
++
++void
++acc_copyin_async (void *h, size_t s, int async)
+ {
+-  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
++  present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
+ }
+ 
++/* acc_present_or_copyin used to be what acc_copyin is now.  */
+ /* acc_pcopyin is acc_present_or_copyin by a different name.  */
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+-strong_alias (acc_present_or_copyin, acc_pcopyin)
++strong_alias (acc_copyin, acc_present_or_copyin)
++strong_alias (acc_copyin, acc_pcopyin)
+ #else
+ void *
++acc_present_or_copyin (void *h, size_t s)
++{
++  return acc_copyin (h, s);
++}
++
++void *
+ acc_pcopyin (void *h, size_t s)
+ {
+-  return acc_present_or_copyin (h, s);
++  return acc_copyin (h, s);
+ }
+ #endif
+ 
+-#define FLAG_COPYOUT (1 << 0)
++#define FLAG_COPYOUT  (1 << 0)
++#define FLAG_FINALIZE (1 << 1)
+ 
+ static void
+-delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
++delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
+ {
+   size_t host_size;
+   splay_tree_key n;
+@@ -581,31 +635,111 @@ delete_copyout (unsigned f, void *h, siz
+ 		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+     }
+ 
+-  gomp_mutex_unlock (&acc_dev->lock);
++  if (n->refcount == REFCOUNT_INFINITY)
++    {
++      n->refcount = 0;
++      n->dynamic_refcount = 0;
++    }
++  if (n->refcount < n->dynamic_refcount)
++    {
++      gomp_mutex_unlock (&acc_dev->lock);
++      gomp_fatal ("Dynamic reference counting assert fail\n");
++    }
+ 
+-  if (f & FLAG_COPYOUT)
+-    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
++  if (f & FLAG_FINALIZE)
++    {
++      n->refcount -= n->dynamic_refcount;
++      n->dynamic_refcount = 0;
++    }
++  else if (n->dynamic_refcount)
++    {
++      n->dynamic_refcount--;
++      n->refcount--;
++    }
++
++  if (n->refcount == 0)
++    {
++      if (n->tgt->refcount == 2)
++	{
++	  struct target_mem_desc *tp, *t;
++	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
++	       tp = t, t = t->prev)
++	    if (n->tgt == t)
++	      {
++		if (tp)
++		  tp->prev = t->prev;
++		else
++		  acc_dev->openacc.data_environ = t->prev;
++		break;
++	      }
++	}
++
++      if (f & FLAG_COPYOUT)
++	{
++	  if (async > acc_async_sync)
++	    acc_dev->openacc.async_set_async_func (async);
++	  acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
++	  if (async > acc_async_sync)
++	    acc_dev->openacc.async_set_async_func (acc_async_sync);
++	}
+ 
+-  acc_unmap_data (h);
++      gomp_remove_var (acc_dev, n);
++    }
+ 
+-  if (!acc_dev->free_func (acc_dev->target_id, d))
+-    gomp_fatal ("error in freeing device memory in %s", libfnname);
++  gomp_mutex_unlock (&acc_dev->lock);
+ }
+ 
+ void
+ acc_delete (void *h , size_t s)
+ {
+-  delete_copyout (0, h, s, __FUNCTION__);
++  delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_delete_async (void *h , size_t s, int async)
++{
++  delete_copyout (0, h, s, async, __FUNCTION__);
++}
++
++void
++acc_delete_finalize (void *h , size_t s)
++{
++  delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_delete_finalize_async (void *h , size_t s, int async)
++{
++  delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
+ }
+ 
+ void
+ acc_copyout (void *h, size_t s)
+ {
+-  delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
++  delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
++}
++
++void
++acc_copyout_async (void *h, size_t s, int async)
++{
++  delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
++}
++
++void
++acc_copyout_finalize (void *h, size_t s)
++{
++  delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
++		  __FUNCTION__);
++}
++
++void
++acc_copyout_finalize_async (void *h, size_t s, int async)
++{
++  delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
+ }
+ 
+ static void
+-update_dev_host (int is_dev, void *h, size_t s)
++update_dev_host (int is_dev, void *h, size_t s, int async)
+ {
+   splay_tree_key n;
+   void *d;
+@@ -631,24 +765,42 @@ update_dev_host (int is_dev, void *h, si
+   d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ 		+ (uintptr_t) h - n->host_start);
+ 
++  if (async > acc_async_sync)
++    acc_dev->openacc.async_set_async_func (async);
++
+   if (is_dev)
+     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+   else
+     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+ 
++  if (async > acc_async_sync)
++    acc_dev->openacc.async_set_async_func (acc_async_sync);
++
+   gomp_mutex_unlock (&acc_dev->lock);
+ }
+ 
+ void
+ acc_update_device (void *h, size_t s)
+ {
+-  update_dev_host (1, h, s);
++  update_dev_host (1, h, s, acc_async_sync);
++}
++
++void
++acc_update_device_async (void *h, size_t s, int async)
++{
++  update_dev_host (1, h, s, async);
+ }
+ 
+ void
+ acc_update_self (void *h, size_t s)
+ {
+-  update_dev_host (0, h, s);
++  update_dev_host (0, h, s, acc_async_sync);
++}
++
++void
++acc_update_self_async (void *h, size_t s, int async)
++{
++  update_dev_host (0, h, s, async);
+ }
+ 
+ void
+@@ -659,11 +811,37 @@ gomp_acc_insert_pointer (size_t mapnum,
+   struct goacc_thread *thr = goacc_thread ();
+   struct gomp_device_descr *acc_dev = thr->dev;
+ 
++  if (acc_is_present (*hostaddrs, *sizes))
++    {
++      splay_tree_key n;
++      gomp_mutex_lock (&acc_dev->lock);
++      n = lookup_host (acc_dev, *hostaddrs, *sizes);
++      gomp_mutex_unlock (&acc_dev->lock);
++
++      tgt = n->tgt;
++      for (size_t i = 0; i < tgt->list_count; i++)
++	if (tgt->list[i].key == n)
++	  {
++	    for (size_t j = 0; j < mapnum; j++)
++	      if (i + j < tgt->list_count && tgt->list[i + j].key)
++		{
++		  tgt->list[i + j].key->refcount++;
++		  tgt->list[i + j].key->dynamic_refcount++;
++		}
++	    return;
++	  }
++      /* Should not reach here.  */
++      gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
++    }
++
+   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
+   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
+ 		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
+ 
++  /* Initialize dynamic refcount.  */
++  tgt->list[0].key->dynamic_refcount = 1;
++
+   gomp_mutex_lock (&acc_dev->lock);
+   tgt->prev = acc_dev->openacc.data_environ;
+   acc_dev->openacc.data_environ = tgt;
+@@ -671,7 +849,8 @@ gomp_acc_insert_pointer (size_t mapnum,
+ }
+ 
+ void
+-gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
++gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
++			 int finalize, int mapnum)
+ {
+   struct goacc_thread *thr = goacc_thread ();
+   struct gomp_device_descr *acc_dev = thr->dev;
+@@ -679,6 +858,9 @@ gomp_acc_remove_pointer (void *h, bool f
+   struct target_mem_desc *t;
+   int minrefs = (mapnum == 1) ? 2 : 3;
+ 
++  if (!acc_is_present (h, s))
++    return;
++
+   gomp_mutex_lock (&acc_dev->lock);
+ 
+   n = lookup_host (acc_dev, h, 1);
+@@ -693,40 +875,65 @@ gomp_acc_remove_pointer (void *h, bool f
+ 
+   t = n->tgt;
+ 
+-  struct target_mem_desc *tp;
++  if (n->refcount < n->dynamic_refcount)
++    {
++      gomp_mutex_unlock (&acc_dev->lock);
++      gomp_fatal ("Dynamic reference counting assert fail\n");
++    }
+ 
+-  if (t->refcount == minrefs)
++  if (finalize)
+     {
+-      /* This is the last reference, so pull the descriptor off the
+-	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
+-	 freeing the device memory. */
+-      t->tgt_end = 0;
+-      t->to_free = 0;
++      n->refcount -= n->dynamic_refcount;
++      n->dynamic_refcount = 0;
++    }
++  else if (n->dynamic_refcount)
++    {
++      n->dynamic_refcount--;
++      n->refcount--;
++    }
+ 
+-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+-	   tp = t, t = t->prev)
++  gomp_mutex_unlock (&acc_dev->lock);
++
++  if (n->refcount == 0)
++    {
++      if (t->refcount == minrefs)
+ 	{
+-	  if (n->tgt == t)
++	  /* This is the last reference, so pull the descriptor off the
++	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
++	     freeing the device memory. */
++	  struct target_mem_desc *tp;
++	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
++	       tp = t, t = t->prev)
+ 	    {
+-	      if (tp)
+-		tp->prev = t->prev;
+-	      else
+-		acc_dev->openacc.data_environ = t->prev;
+-	      break;
++	      if (n->tgt == t)
++		{
++		  if (tp)
++		    tp->prev = t->prev;
++		  else
++		    acc_dev->openacc.data_environ = t->prev;
++		  break;
++		}
+ 	    }
+ 	}
+-    }
+ 
+-  if (force_copyfrom)
+-    t->list[0].copy_from = 1;
++      /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
++      n->refcount = 1;
++      t->refcount = minrefs;
++      for (size_t i = 0; i < t->list_count; i++)
++	if (t->list[i].key == n)
++	  {
++	    t->list[i].copy_from = force_copyfrom ? 1 : 0;
++	    break;
++	  }
+ 
+-  gomp_mutex_unlock (&acc_dev->lock);
++      /* If running synchronously, unmap immediately.  */
++      if (async < acc_async_noval)
++	gomp_unmap_vars (t, true);
++      else
++	t->device_descr->openacc.register_async_cleanup_func (t, async);
++    }
+ 
+-  /* If running synchronously, unmap immediately.  */
+-  if (async < acc_async_noval)
+-    gomp_unmap_vars (t, true);
+-  else
+-    t->device_descr->openacc.register_async_cleanup_func (t, async);
++  gomp_mutex_unlock (&acc_dev->lock);
+ 
+   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
+ }
+--- libgomp/env.c.jj	2018-04-25 09:40:31.924655586 +0200
++++ libgomp/env.c	2019-05-07 18:46:36.482110438 +0200
+@@ -88,8 +88,12 @@ void **gomp_places_list;
+ unsigned long gomp_places_list_len;
+ int gomp_debug_var;
+ unsigned int gomp_num_teams_var;
++bool gomp_display_affinity_var;
++char *gomp_affinity_format_var = "level %L thread %i affinity %A";
++size_t gomp_affinity_format_len;
+ char *goacc_device_type;
+ int goacc_device_num;
++int goacc_default_dims[GOMP_DIM_MAX];
+ 
+ #ifndef LIBGOMP_OFFLOADED_ONLY
+ 
+@@ -100,6 +104,7 @@ parse_schedule (void)
+ {
+   char *env, *end;
+   unsigned long value;
++  int monotonic = 0;
+ 
+   env = getenv ("OMP_SCHEDULE");
+   if (env == NULL)
+@@ -107,6 +112,26 @@ parse_schedule (void)
+ 
+   while (isspace ((unsigned char) *env))
+     ++env;
++  if (strncasecmp (env, "monotonic", 9) == 0)
++    {
++      monotonic = 1;
++      env += 9;
++    }
++  else if (strncasecmp (env, "nonmonotonic", 12) == 0)
++    {
++      monotonic = -1;
++      env += 12;
++    }
++  if (monotonic)
++    {
++      while (isspace ((unsigned char) *env))
++	++env;
++      if (*env != ':')
++	goto unknown;
++      ++env;
++      while (isspace ((unsigned char) *env))
++	++env;
++    }
+   if (strncasecmp (env, "static", 6) == 0)
+     {
+       gomp_global_icv.run_sched_var = GFS_STATIC;
+@@ -130,12 +155,16 @@ parse_schedule (void)
+   else
+     goto unknown;
+ 
++  if (monotonic == 1
++      || (monotonic == 0 && gomp_global_icv.run_sched_var == GFS_STATIC))
++    gomp_global_icv.run_sched_var |= GFS_MONOTONIC;
++
+   while (isspace ((unsigned char) *env))
+     ++env;
+   if (*env == '\0')
+     {
+       gomp_global_icv.run_sched_chunk_size
+-	= gomp_global_icv.run_sched_var != GFS_STATIC;
++	= (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC;
+       return;
+     }
+   if (*env++ != ',')
+@@ -158,7 +187,8 @@ parse_schedule (void)
+   if ((int)value != value)
+     goto invalid;
+ 
+-  if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
++  if (value == 0
++      && (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC)
+     value = 1;
+   gomp_global_icv.run_sched_chunk_size = value;
+   return;
+@@ -1066,6 +1096,36 @@ parse_acc_device_type (void)
+ }
+ 
+ static void
++parse_gomp_openacc_dim (void)
++{
++  /* The syntax is the same as for the -fopenacc-dim compilation option.  */
++  const char *var_name = "GOMP_OPENACC_DIM";
++  const char *env_var = getenv (var_name);
++  if (!env_var)
++    return;
++
++  const char *pos = env_var;
++  int i;
++  for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
++    {
++      if (i && *pos++ != ':')
++	break;
++
++      if (*pos == ':')
++	continue;
++
++      const char *eptr;
++      errno = 0;
++      long val = strtol (pos, (char **)&eptr, 10);
++      if (errno || val < 0 || (unsigned)val != val)
++	break;
++
++      goacc_default_dims[i] = (int)val;
++      pos = eptr;
++    }
++}
++
++static void
+ handle_omp_display_env (unsigned long stacksize, int wait_policy)
+ {
+   const char *env;
+@@ -1119,19 +1179,34 @@ handle_omp_display_env (unsigned long st
+   fputs ("'\n", stderr);
+ 
+   fprintf (stderr, "  OMP_SCHEDULE = '");
+-  switch (gomp_global_icv.run_sched_var)
++  if ((gomp_global_icv.run_sched_var & GFS_MONOTONIC))
++    {
++      if (gomp_global_icv.run_sched_var != (GFS_MONOTONIC | GFS_STATIC))
++	fputs ("MONOTONIC:", stderr);
++    }
++  else if (gomp_global_icv.run_sched_var == GFS_STATIC)
++    fputs ("NONMONOTONIC:", stderr);
++  switch (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC)
+     {
+     case GFS_RUNTIME:
+       fputs ("RUNTIME", stderr);
++      if (gomp_global_icv.run_sched_chunk_size != 1)
++	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+       break;
+     case GFS_STATIC:
+       fputs ("STATIC", stderr);
++      if (gomp_global_icv.run_sched_chunk_size != 0)
++	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+       break;
+     case GFS_DYNAMIC:
+       fputs ("DYNAMIC", stderr);
++      if (gomp_global_icv.run_sched_chunk_size != 1)
++	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+       break;
+     case GFS_GUIDED:
+       fputs ("GUIDED", stderr);
++      if (gomp_global_icv.run_sched_chunk_size != 1)
++	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
+       break;
+     case GFS_AUTO:
+       fputs ("AUTO", stderr);
+@@ -1197,6 +1272,10 @@ handle_omp_display_env (unsigned long st
+ 	   gomp_global_icv.default_device_var);
+   fprintf (stderr, "  OMP_MAX_TASK_PRIORITY = '%d'\n",
+ 	   gomp_max_task_priority_var);
++  fprintf (stderr, "  OMP_DISPLAY_AFFINITY = '%s'\n",
++	   gomp_display_affinity_var ? "TRUE" : "FALSE");
++  fprintf (stderr, "  OMP_AFFINITY_FORMAT = '%s'\n",
++	   gomp_affinity_format_var);
+ 
+   if (verbose)
+     {
+@@ -1228,6 +1307,7 @@ initialize_env (void)
+   parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
+   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
+   parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
++  parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
+   parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
+   parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
+   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
+@@ -1277,6 +1357,13 @@ initialize_env (void)
+     }
+   if (gomp_global_icv.bind_var != omp_proc_bind_false)
+     gomp_init_affinity ();
++
++  {
++    const char *env = getenv ("OMP_AFFINITY_FORMAT");
++    if (env != NULL)
++      gomp_set_affinity_format (env, strlen (env));
++  }
++
+   wait_policy = parse_wait_policy ();
+   if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
+     {
+@@ -1302,7 +1389,6 @@ initialize_env (void)
+ 
+   /* Not strictly environment related, but ordering constructors is tricky.  */
+   pthread_attr_init (&gomp_thread_attr);
+-  pthread_attr_setdetachstate (&gomp_thread_attr, PTHREAD_CREATE_DETACHED);
+ 
+   if (parse_stacksize ("OMP_STACKSIZE", &stacksize)
+       || parse_stacksize ("GOMP_STACKSIZE", &stacksize)
+@@ -1336,6 +1422,7 @@ initialize_env (void)
+     goacc_device_num = 0;
+ 
+   parse_acc_device_type ();
++  parse_gomp_openacc_dim ();
+ 
+   goacc_runtime_initialize ();
+ }
+--- libgomp/fortran.c.jj	2018-04-25 09:40:31.913655581 +0200
++++ libgomp/fortran.c	2019-05-07 18:46:36.491110295 +0200
+@@ -28,6 +28,8 @@
+ #include "libgomp.h"
+ #include "libgomp_f.h"
+ #include <stdlib.h>
++#include <stdio.h>
++#include <string.h>
+ #include <limits.h>
+ 
+ #ifdef HAVE_ATTRIBUTE_ALIAS
+@@ -82,6 +84,8 @@ ialias_redirect (omp_get_team_num)
+ ialias_redirect (omp_is_initial_device)
+ ialias_redirect (omp_get_initial_device)
+ ialias_redirect (omp_get_max_task_priority)
++ialias_redirect (omp_pause_resource)
++ialias_redirect (omp_pause_resource_all)
+ #endif
+ 
+ #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
+@@ -368,7 +372,9 @@ omp_get_schedule_ (int32_t *kind, int32_
+   omp_sched_t k;
+   int cs;
+   omp_get_schedule (&k, &cs);
+-  *kind = k;
++  /* For now mask off GFS_MONOTONIC, because OpenMP 4.5 code will not
++     expect to see it.  */
++  *kind = k & ~GFS_MONOTONIC;
+   *chunk_size = cs;
+ }
+ 
+@@ -378,7 +384,8 @@ omp_get_schedule_8_ (int32_t *kind, int6
+   omp_sched_t k;
+   int cs;
+   omp_get_schedule (&k, &cs);
+-  *kind = k;
++  /* See above.  */
++  *kind = k & ~GFS_MONOTONIC;
+   *chunk_size = cs;
+ }
+ 
+@@ -576,3 +583,96 @@ omp_get_max_task_priority_ (void)
+ {
+   return omp_get_max_task_priority ();
+ }
++
++void
++omp_set_affinity_format_ (const char *format, size_t format_len)
++{
++  gomp_set_affinity_format (format, format_len);
++}
++
++int32_t
++omp_get_affinity_format_ (char *buffer, size_t buffer_len)
++{
++  size_t len = strlen (gomp_affinity_format_var);
++  if (buffer_len)
++    {
++      if (len < buffer_len)
++	{
++	  memcpy (buffer, gomp_affinity_format_var, len);
++	  memset (buffer + len, ' ', buffer_len - len);
++	}
++      else
++	memcpy (buffer, gomp_affinity_format_var, buffer_len);
++    }
++  return len;
++}
++
++void
++omp_display_affinity_ (const char *format, size_t format_len)
++{
++  char *fmt = NULL, fmt_buf[256];
++  char buf[512];
++  if (format_len)
++    {
++      fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
++      memcpy (fmt, format, format_len);
++      fmt[format_len] = '\0';
++    }
++  struct gomp_thread *thr = gomp_thread ();
++  size_t ret
++    = gomp_display_affinity (buf, sizeof buf,
++			     format_len ? fmt : gomp_affinity_format_var,
++			     gomp_thread_self (), &thr->ts, thr->place);
++  if (ret < sizeof buf)
++    {
++      buf[ret] = '\n';
++      gomp_print_string (buf, ret + 1);
++    }
++  else
++    {
++      char *b = gomp_malloc (ret + 1);
++      gomp_display_affinity (buf, sizeof buf,
++			     format_len ? fmt : gomp_affinity_format_var,
++			     gomp_thread_self (), &thr->ts, thr->place);
++      b[ret] = '\n';
++      gomp_print_string (b, ret + 1);
++      free (b);
++    }
++  if (fmt && fmt != fmt_buf)
++    free (fmt);
++}
++
++int32_t
++omp_capture_affinity_ (char *buffer, const char *format,
++		       size_t buffer_len, size_t format_len)
++{
++  char *fmt = NULL, fmt_buf[256];
++  if (format_len)
++    {
++      fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
++      memcpy (fmt, format, format_len);
++      fmt[format_len] = '\0';
++    }
++  struct gomp_thread *thr = gomp_thread ();
++  size_t ret
++    = gomp_display_affinity (buffer, buffer_len,
++			     format_len ? fmt : gomp_affinity_format_var,
++			     gomp_thread_self (), &thr->ts, thr->place);
++  if (fmt && fmt != fmt_buf)
++    free (fmt);
++  if (ret < buffer_len)
++    memset (buffer + ret, ' ', buffer_len - ret);
++  return ret;
++}
++
++int32_t
++omp_pause_resource_ (const int32_t *kind, const int32_t *device_num)
++{
++  return omp_pause_resource (*kind, *device_num);
++}
++
++int32_t
++omp_pause_resource_all_ (const int32_t *kind)
++{
++  return omp_pause_resource_all (*kind);
++}
+--- libgomp/configure.tgt.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/configure.tgt	2019-05-07 18:46:36.479110486 +0200
+@@ -18,7 +18,7 @@ if test $gcc_cv_have_tls = yes ; then
+ 	;;
+ 
+     *-*-linux* | *-*-gnu*)
+-	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec"
++	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec -DUSING_INITIAL_EXEC_TLS"
+ 	;;
+ 
+     *-*-rtems*)
+--- libgomp/icv-device.c.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/icv-device.c	2019-05-07 18:46:36.513109943 +0200
+@@ -49,20 +49,6 @@ omp_get_num_devices (void)
+ }
+ 
+ int
+-omp_get_num_teams (void)
+-{
+-  /* Hardcoded to 1 on host, MIC, HSAIL?  Maybe variable on PTX.  */
+-  return 1;
+-}
+-
+-int
+-omp_get_team_num (void)
+-{
+-  /* Hardcoded to 0 on host, MIC, HSAIL?  Maybe variable on PTX.  */
+-  return 0;
+-}
+-
+-int
+ omp_is_initial_device (void)
+ {
+   /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX.  */
+@@ -72,6 +58,4 @@ omp_is_initial_device (void)
+ ialias (omp_set_default_device)
+ ialias (omp_get_default_device)
+ ialias (omp_get_num_devices)
+-ialias (omp_get_num_teams)
+-ialias (omp_get_team_num)
+ ialias (omp_is_initial_device)
+--- libgomp/Makefile.in.jj	2018-04-25 09:40:31.320655306 +0200
++++ libgomp/Makefile.in	2019-05-07 20:00:01.082077522 +0200
+@@ -90,7 +90,7 @@ DIST_COMMON = $(top_srcdir)/plugin/Makef
+ 	$(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
+ @PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
+ @PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la
+-@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
++@USE_FORTRAN_TRUE@am__append_3 = openacc2.f90
+ subdir = .
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+@@ -172,7 +172,7 @@ libgomp_plugin_nvptx_la_LINK = $(LIBTOOL
+ @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \
+ @PLUGIN_NVPTX_TRUE@	$(toolexeclibdir)
+ libgomp_la_LIBADD =
+-@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
++@USE_FORTRAN_TRUE@am__objects_1 = openacc2.lo
+ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
+ 	env.lo error.lo icv.lo icv-device.lo iter.lo iter_ull.lo \
+ 	loop.lo loop_ull.lo ordered.lo parallel.lo sections.lo \
+@@ -180,7 +180,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.
+ 	sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \
+ 	target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
+ 	oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
+-	oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1)
++	oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
++	teams.lo $(am__objects_1)
+ libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
+ DEFAULT_INCLUDES = -I.@am__isrc@
+ depcomp = $(SHELL) $(top_srcdir)/../depcomp
+@@ -380,6 +381,7 @@ mkdir_p = @mkdir_p@
+ multi_basedir = @multi_basedir@
+ offload_additional_lib_paths = @offload_additional_lib_paths@
+ offload_additional_options = @offload_additional_options@
++offload_plugins = @offload_plugins@
+ offload_targets = @offload_targets@
+ oldincludedir = @oldincludedir@
+ pdfdir = @pdfdir@
+@@ -436,7 +438,7 @@ libgomp_la_SOURCES = alloc.c atomic.c ba
+ 	affinity.c target.c splay-tree.c libgomp-plugin.c \
+ 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
+ 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
+-	$(am__append_3)
++	affinity-fmt.c teams.c $(am__append_3)
+ 
+ # Nvidia PTX OpenACC plugin.
+ @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
+@@ -599,6 +601,7 @@ mostlyclean-compile:
+ distclean-compile:
+ 	-rm -f *.tab.c
+ 
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
+@@ -638,6 +641,7 @@ distclean-compile:
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@
+ 
+@@ -1292,8 +1296,6 @@ omp_lib_kinds.mod: omp_lib.mod
+ 	:
+ openacc_kinds.mod: openacc.mod
+ 	:
+-openacc.mod: openacc.lo
+-	:
+ %.mod: %.f90
+ 	$(FC) $(FCFLAGS) -fsyntax-only $<
+ fortran.lo: libgomp_f.h
+--- libgomp/plugin/cuda/cuda.h.jj	2018-04-25 09:40:31.914655581 +0200
++++ libgomp/plugin/cuda/cuda.h	2019-05-07 18:46:36.533109624 +0200
+@@ -44,6 +44,7 @@ typedef void *CUevent;
+ typedef void *CUfunction;
+ typedef void *CUlinkState;
+ typedef void *CUmodule;
++typedef size_t (*CUoccupancyB2DSize)(int);
+ typedef void *CUstream;
+ 
+ typedef enum {
+@@ -88,6 +89,7 @@ typedef enum {
+   CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
+   CU_JIT_ERROR_LOG_BUFFER = 5,
+   CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
++  CU_JIT_OPTIMIZATION_LEVEL = 7,
+   CU_JIT_LOG_VERBOSE = 12
+ } CUjit_option;
+ 
+@@ -169,6 +171,8 @@ CUresult cuModuleGetGlobal (CUdeviceptr
+ CUresult cuModuleLoad (CUmodule *, const char *);
+ CUresult cuModuleLoadData (CUmodule *, const void *);
+ CUresult cuModuleUnload (CUmodule);
++CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
++					  CUoccupancyB2DSize, size_t, int);
+ CUresult cuStreamCreate (CUstream *, unsigned);
+ #define cuStreamDestroy cuStreamDestroy_v2
+ CUresult cuStreamDestroy (CUstream);
+--- libgomp/plugin/cuda-lib.def.jj	2019-05-07 18:46:36.533109624 +0200
++++ libgomp/plugin/cuda-lib.def	2019-05-07 18:46:36.533109624 +0200
+@@ -0,0 +1,49 @@
++CUDA_ONE_CALL (cuCtxCreate)
++CUDA_ONE_CALL (cuCtxDestroy)
++CUDA_ONE_CALL (cuCtxGetCurrent)
++CUDA_ONE_CALL (cuCtxGetDevice)
++CUDA_ONE_CALL (cuCtxPopCurrent)
++CUDA_ONE_CALL (cuCtxPushCurrent)
++CUDA_ONE_CALL (cuCtxSynchronize)
++CUDA_ONE_CALL (cuDeviceGet)
++CUDA_ONE_CALL (cuDeviceGetAttribute)
++CUDA_ONE_CALL (cuDeviceGetCount)
++CUDA_ONE_CALL (cuEventCreate)
++CUDA_ONE_CALL (cuEventDestroy)
++CUDA_ONE_CALL (cuEventElapsedTime)
++CUDA_ONE_CALL (cuEventQuery)
++CUDA_ONE_CALL (cuEventRecord)
++CUDA_ONE_CALL (cuEventSynchronize)
++CUDA_ONE_CALL (cuFuncGetAttribute)
++CUDA_ONE_CALL_MAYBE_NULL (cuGetErrorString)
++CUDA_ONE_CALL (cuInit)
++CUDA_ONE_CALL (cuLaunchKernel)
++CUDA_ONE_CALL (cuLinkAddData)
++CUDA_ONE_CALL_MAYBE_NULL (cuLinkAddData_v2)
++CUDA_ONE_CALL (cuLinkComplete)
++CUDA_ONE_CALL (cuLinkCreate)
++CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2)
++CUDA_ONE_CALL (cuLinkDestroy)
++CUDA_ONE_CALL (cuMemAlloc)
++CUDA_ONE_CALL (cuMemAllocHost)
++CUDA_ONE_CALL (cuMemcpy)
++CUDA_ONE_CALL (cuMemcpyDtoDAsync)
++CUDA_ONE_CALL (cuMemcpyDtoH)
++CUDA_ONE_CALL (cuMemcpyDtoHAsync)
++CUDA_ONE_CALL (cuMemcpyHtoD)
++CUDA_ONE_CALL (cuMemcpyHtoDAsync)
++CUDA_ONE_CALL (cuMemFree)
++CUDA_ONE_CALL (cuMemFreeHost)
++CUDA_ONE_CALL (cuMemGetAddressRange)
++CUDA_ONE_CALL (cuMemHostGetDevicePointer)
++CUDA_ONE_CALL (cuModuleGetFunction)
++CUDA_ONE_CALL (cuModuleGetGlobal)
++CUDA_ONE_CALL (cuModuleLoad)
++CUDA_ONE_CALL (cuModuleLoadData)
++CUDA_ONE_CALL (cuModuleUnload)
++CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize)
++CUDA_ONE_CALL (cuStreamCreate)
++CUDA_ONE_CALL (cuStreamDestroy)
++CUDA_ONE_CALL (cuStreamQuery)
++CUDA_ONE_CALL (cuStreamSynchronize)
++CUDA_ONE_CALL (cuStreamWaitEvent)
+--- libgomp/plugin/plugin-nvptx.c.jj	2018-04-25 09:40:31.915655582 +0200
++++ libgomp/plugin/plugin-nvptx.c	2019-05-07 18:46:36.535109592 +0200
+@@ -31,6 +31,7 @@
+    is not clear as to what that state might be.  Or how one might
+    propagate it from one thread to another.  */
+ 
++#define _GNU_SOURCE
+ #include "openacc.h"
+ #include "config.h"
+ #include "libgomp-plugin.h"
+@@ -48,60 +49,41 @@
+ #include <assert.h>
+ #include <errno.h>
+ 
++#if CUDA_VERSION < 6000
++extern CUresult cuGetErrorString (CUresult, const char **);
++#define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82
++#endif
++
++#if CUDA_VERSION >= 6050
++#undef cuLinkCreate
++#undef cuLinkAddData
++CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t,
++			const char *, unsigned, CUjit_option *, void **);
++CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
++#else
++typedef size_t (*CUoccupancyB2DSize)(int);
++CUresult cuLinkAddData_v2 (CUlinkState, CUjitInputType, void *, size_t,
++			   const char *, unsigned, CUjit_option *, void **);
++CUresult cuLinkCreate_v2 (unsigned, CUjit_option *, void **, CUlinkState *);
++CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
++					  CUoccupancyB2DSize, size_t, int);
++#endif
++
++#define DO_PRAGMA(x) _Pragma (#x)
++
+ #if PLUGIN_NVPTX_DYNAMIC
+ # include <dlfcn.h>
+ 
+-# define CUDA_CALLS \
+-CUDA_ONE_CALL (cuCtxCreate)		\
+-CUDA_ONE_CALL (cuCtxDestroy)		\
+-CUDA_ONE_CALL (cuCtxGetCurrent)		\
+-CUDA_ONE_CALL (cuCtxGetDevice)		\
+-CUDA_ONE_CALL (cuCtxPopCurrent)		\
+-CUDA_ONE_CALL (cuCtxPushCurrent)	\
+-CUDA_ONE_CALL (cuCtxSynchronize)	\
+-CUDA_ONE_CALL (cuDeviceGet)		\
+-CUDA_ONE_CALL (cuDeviceGetAttribute)	\
+-CUDA_ONE_CALL (cuDeviceGetCount)	\
+-CUDA_ONE_CALL (cuEventCreate)		\
+-CUDA_ONE_CALL (cuEventDestroy)		\
+-CUDA_ONE_CALL (cuEventElapsedTime)	\
+-CUDA_ONE_CALL (cuEventQuery)		\
+-CUDA_ONE_CALL (cuEventRecord)		\
+-CUDA_ONE_CALL (cuEventSynchronize)	\
+-CUDA_ONE_CALL (cuFuncGetAttribute)	\
+-CUDA_ONE_CALL (cuGetErrorString)	\
+-CUDA_ONE_CALL (cuInit)			\
+-CUDA_ONE_CALL (cuLaunchKernel)		\
+-CUDA_ONE_CALL (cuLinkAddData)		\
+-CUDA_ONE_CALL (cuLinkComplete)		\
+-CUDA_ONE_CALL (cuLinkCreate)		\
+-CUDA_ONE_CALL (cuLinkDestroy)		\
+-CUDA_ONE_CALL (cuMemAlloc)		\
+-CUDA_ONE_CALL (cuMemAllocHost)		\
+-CUDA_ONE_CALL (cuMemcpy)		\
+-CUDA_ONE_CALL (cuMemcpyDtoDAsync)	\
+-CUDA_ONE_CALL (cuMemcpyDtoH)		\
+-CUDA_ONE_CALL (cuMemcpyDtoHAsync)	\
+-CUDA_ONE_CALL (cuMemcpyHtoD)		\
+-CUDA_ONE_CALL (cuMemcpyHtoDAsync)	\
+-CUDA_ONE_CALL (cuMemFree)		\
+-CUDA_ONE_CALL (cuMemFreeHost)		\
+-CUDA_ONE_CALL (cuMemGetAddressRange)	\
+-CUDA_ONE_CALL (cuMemHostGetDevicePointer)\
+-CUDA_ONE_CALL (cuModuleGetFunction)	\
+-CUDA_ONE_CALL (cuModuleGetGlobal)	\
+-CUDA_ONE_CALL (cuModuleLoad)		\
+-CUDA_ONE_CALL (cuModuleLoadData)	\
+-CUDA_ONE_CALL (cuModuleUnload)		\
+-CUDA_ONE_CALL (cuStreamCreate)		\
+-CUDA_ONE_CALL (cuStreamDestroy)		\
+-CUDA_ONE_CALL (cuStreamQuery)		\
+-CUDA_ONE_CALL (cuStreamSynchronize)	\
+-CUDA_ONE_CALL (cuStreamWaitEvent)
+-# define CUDA_ONE_CALL(call) \
+-  __typeof (call) *call;
+ struct cuda_lib_s {
+-  CUDA_CALLS
++
++# define CUDA_ONE_CALL(call)			\
++  __typeof (call) *call;
++# define CUDA_ONE_CALL_MAYBE_NULL(call)		\
++  CUDA_ONE_CALL (call)
++#include "cuda-lib.def"
++# undef CUDA_ONE_CALL
++# undef CUDA_ONE_CALL_MAYBE_NULL
++
+ } cuda_lib;
+ 
+ /* -1 if init_cuda_lib has not been called yet, false
+@@ -120,24 +102,41 @@ init_cuda_lib (void)
+   cuda_lib_inited = false;
+   if (h == NULL)
+     return false;
+-# undef CUDA_ONE_CALL
+-# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call)
+-# define CUDA_ONE_CALL_1(call) \
++
++# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call, false)
++# define CUDA_ONE_CALL_MAYBE_NULL(call) CUDA_ONE_CALL_1 (call, true)
++# define CUDA_ONE_CALL_1(call, allow_null)		\
+   cuda_lib.call = dlsym (h, #call);	\
+-  if (cuda_lib.call == NULL)		\
++  if (!allow_null && cuda_lib.call == NULL)		\
+     return false;
+-  CUDA_CALLS
++#include "cuda-lib.def"
++# undef CUDA_ONE_CALL
++# undef CUDA_ONE_CALL_1
++# undef CUDA_ONE_CALL_MAYBE_NULL
++
+   cuda_lib_inited = true;
+   return true;
+ }
+-# undef CUDA_ONE_CALL
+-# undef CUDA_ONE_CALL_1
+ # define CUDA_CALL_PREFIX cuda_lib.
+ #else
++
++# define CUDA_ONE_CALL(call)
++# define CUDA_ONE_CALL_MAYBE_NULL(call) DO_PRAGMA (weak call)
++#include "cuda-lib.def"
++#undef CUDA_ONE_CALL_MAYBE_NULL
++#undef CUDA_ONE_CALL
++
+ # define CUDA_CALL_PREFIX
+ # define init_cuda_lib() true
+ #endif
+ 
++#include "secure_getenv.h"
++
++#undef MIN
++#undef MAX
++#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
++#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
++
+ /* Convenience macros for the frequently used CUDA library call and
+    error handling sequence as well as CUDA library calls that
+    do the error checking themselves or don't do it at all.  */
+@@ -171,40 +170,42 @@ init_cuda_lib (void)
+ #define CUDA_CALL_NOCHECK(FN, ...)		\
+   CUDA_CALL_PREFIX FN (__VA_ARGS__)
+ 
++#define CUDA_CALL_EXISTS(FN)			\
++  CUDA_CALL_PREFIX FN
++
+ static const char *
+ cuda_error (CUresult r)
+ {
+-#if CUDA_VERSION < 7000
+-  /* Specified in documentation and present in library from at least
+-     5.5.  Not declared in header file prior to 7.0.  */
+-  extern CUresult cuGetErrorString (CUresult, const char **);
+-#endif
++  const char *fallback = "unknown cuda error";
+   const char *desc;
+ 
++  if (!CUDA_CALL_EXISTS (cuGetErrorString))
++    return fallback;
++
+   r = CUDA_CALL_NOCHECK (cuGetErrorString, r, &desc);
+-  if (r != CUDA_SUCCESS)
+-    desc = "unknown cuda error";
++  if (r == CUDA_SUCCESS)
++    return desc;
+ 
+-  return desc;
++  return fallback;
+ }
+ 
+ static unsigned int instantiated_devices = 0;
+ static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
+ 
++struct cuda_map
++{
++  CUdeviceptr d;
++  size_t size;
++  bool active;
++  struct cuda_map *next;
++};
++
+ struct ptx_stream
+ {
+   CUstream stream;
+   pthread_t host_thread;
+   bool multithreaded;
+-
+-  CUdeviceptr d;
+-  void *h;
+-  void *h_begin;
+-  void *h_end;
+-  void *h_next;
+-  void *h_prev;
+-  void *h_tail;
+-
++  struct cuda_map *map;
+   struct ptx_stream *next;
+ };
+ 
+@@ -216,12 +217,64 @@ struct nvptx_thread
+   struct ptx_device *ptx_dev;
+ };
+ 
+-struct map
++static struct cuda_map *
++cuda_map_create (size_t size)
+ {
+-  int     async;
+-  size_t  size;
+-  char    mappings[0];
+-};
++  struct cuda_map *map = GOMP_PLUGIN_malloc (sizeof (struct cuda_map));
++
++  assert (map);
++
++  map->next = NULL;
++  map->size = size;
++  map->active = false;
++
++  CUDA_CALL_ERET (NULL, cuMemAlloc, &map->d, size);
++  assert (map->d);
++
++  return map;
++}
++
++static void
++cuda_map_destroy (struct cuda_map *map)
++{
++  if (map->active)
++    /* Possible reasons for the map to be still active:
++       - the associated async kernel might still be running.
++       - the associated async kernel might have finished, but the
++         corresponding event that should trigger the pop_map has not been
++	 processed by event_gc.
++       - the associated sync kernel might have aborted
++
++       The async cases could happen if the user specified an async region
++       without adding a corresponding wait that is guaranteed to be executed
++       (before returning from main, or in an atexit handler).
++       We do not want to deallocate a device pointer that is still being
++       used, so skip it.
++
++       In the sync case, the device pointer is no longer used, but deallocating
++       it using cuMemFree will not succeed, so skip it.
++
++       TODO: Handle this in a more constructive way, by f.i. waiting for streams
++       to finish before de-allocating them (PR88981), or by ensuring the CUDA
++       lib atexit handler is called before rather than after the libgomp plugin
++       atexit handler (PR83795).  */
++    ;
++  else
++    CUDA_CALL_NOCHECK (cuMemFree, map->d);
++
++  free (map);
++}
++
++/* The following map_* routines manage the CUDA device memory that
++   contains the data mapping arguments for cuLaunchKernel.  Each
++   asynchronous PTX stream may have multiple pending kernel
++   invocations, which are launched in a FIFO order.  As such, the map
++   routines maintains a queue of cuLaunchKernel arguments.
++
++   Calls to map_push and map_pop must be guarded by ptx_event_lock.
++   Likewise, calls to map_init and map_fini are guarded by
++   ptx_dev_lock inside GOMP_OFFLOAD_init_device and
++   GOMP_OFFLOAD_fini_device, respectively.  */
+ 
+ static bool
+ map_init (struct ptx_stream *s)
+@@ -229,109 +282,83 @@ map_init (struct ptx_stream *s)
+   int size = getpagesize ();
+ 
+   assert (s);
+-  assert (!s->d);
+-  assert (!s->h);
+-
+-  CUDA_CALL (cuMemAllocHost, &s->h, size);
+-  CUDA_CALL (cuMemHostGetDevicePointer, &s->d, s->h, 0);
+ 
+-  assert (s->h);
++  s->map = cuda_map_create (size);
+ 
+-  s->h_begin = s->h;
+-  s->h_end = s->h_begin + size;
+-  s->h_next = s->h_prev = s->h_tail = s->h_begin;
+-
+-  assert (s->h_next);
+-  assert (s->h_end);
+   return true;
+ }
+ 
+ static bool
+ map_fini (struct ptx_stream *s)
+ {
+-  CUDA_CALL (cuMemFreeHost, s->h);
++  assert (s->map->next == NULL);
++
++  cuda_map_destroy (s->map);
++
+   return true;
+ }
+ 
+ static void
+ map_pop (struct ptx_stream *s)
+ {
+-  struct map *m;
++  struct cuda_map *next;
+ 
+   assert (s != NULL);
+-  assert (s->h_next);
+-  assert (s->h_prev);
+-  assert (s->h_tail);
+-
+-  m = s->h_tail;
+-
+-  s->h_tail += m->size;
+-
+-  if (s->h_tail >= s->h_end)
+-    s->h_tail = s->h_begin + (int) (s->h_tail - s->h_end);
+-
+-  if (s->h_next == s->h_tail)
+-    s->h_prev = s->h_next;
+ 
+-  assert (s->h_next >= s->h_begin);
+-  assert (s->h_tail >= s->h_begin);
+-  assert (s->h_prev >= s->h_begin);
++  if (s->map->next == NULL)
++    {
++      s->map->active = false;
++      return;
++    }
+ 
+-  assert (s->h_next <= s->h_end);
+-  assert (s->h_tail <= s->h_end);
+-  assert (s->h_prev <= s->h_end);
++  next = s->map->next;
++  cuda_map_destroy (s->map);
++  s->map = next;
+ }
+ 
+-static void
+-map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
++static CUdeviceptr
++map_push (struct ptx_stream *s, size_t size)
+ {
+-  int left;
+-  int offset;
+-  struct map *m;
++  struct cuda_map *map = NULL;
++  struct cuda_map **t;
+ 
+-  assert (s != NULL);
+-
+-  left = s->h_end - s->h_next;
+-  size += sizeof (struct map);
+-
+-  assert (s->h_prev);
+-  assert (s->h_next);
++  assert (s);
++  assert (s->map);
+ 
+-  if (size >= left)
++  /* Select an element to push.  */
++  if (s->map->active)
++    map = cuda_map_create (size);
++  else
+     {
+-      m = s->h_prev;
+-      m->size += left;
+-      s->h_next = s->h_begin;
+-
+-      if (s->h_next + size > s->h_end)
+-	GOMP_PLUGIN_fatal ("unable to push map");
+-    }
+-
+-  assert (s->h_next);
+-
+-  m = s->h_next;
+-  m->async = async;
+-  m->size = size;
++      /* Pop the inactive front element.  */
++      struct cuda_map *pop = s->map;
++      s->map = pop->next;
++      pop->next = NULL;
+ 
+-  offset = (void *)&m->mappings[0] - s->h;
++      if (pop->size < size)
++	{
++	  cuda_map_destroy (pop);
+ 
+-  *d = (void *)(s->d + offset);
+-  *h = (void *)(s->h + offset);
++	  map = cuda_map_create (size);
++	}
++      else
++	map = pop;
++    }
+ 
+-  s->h_prev = s->h_next;
+-  s->h_next += size;
++  /* Check that the element is as expected.  */
++  assert (map->next == NULL);
++  assert (!map->active);
+ 
+-  assert (s->h_prev);
+-  assert (s->h_next);
++  /* Mark the element active.  */
++  map->active = true;
+ 
+-  assert (s->h_next >= s->h_begin);
+-  assert (s->h_tail >= s->h_begin);
+-  assert (s->h_prev >= s->h_begin);
+-  assert (s->h_next <= s->h_end);
+-  assert (s->h_tail <= s->h_end);
+-  assert (s->h_prev <= s->h_end);
++  /* Push the element to the back of the list.  */
++  for (t = &s->map; (*t) != NULL; t = &(*t)->next)
++    ;
++  assert (t != NULL && *t == NULL);
++  *t = map;
+ 
+-  return;
++  return map->d;
+ }
+ 
+ /* Target data function launch information.  */
+@@ -411,6 +438,10 @@ struct ptx_device
+   int num_sms;
+   int regs_per_block;
+   int regs_per_sm;
++  int warp_size;
++  int max_threads_per_block;
++  int max_threads_per_multiprocessor;
++  int default_dims[GOMP_DIM_MAX];
+ 
+   struct ptx_image_data *images;  /* Images loaded on device.  */
+   pthread_mutex_t image_lock;     /* Lock for above list.  */
+@@ -458,8 +489,6 @@ init_streams_for_device (struct ptx_devi
+   null_stream->stream = NULL;
+   null_stream->host_thread = pthread_self ();
+   null_stream->multithreaded = true;
+-  null_stream->d = (CUdeviceptr) NULL;
+-  null_stream->h = NULL;
+   if (!map_init (null_stream))
+     return false;
+ 
+@@ -594,8 +623,6 @@ select_stream_for_async (int async, pthr
+ 	  s->host_thread = thread;
+ 	  s->multithreaded = false;
+ 
+-	  s->d = (CUdeviceptr) NULL;
+-	  s->h = NULL;
+ 	  if (!map_init (s))
+ 	    {
+ 	      pthread_mutex_unlock (&ptx_dev->stream_lock);
+@@ -777,9 +804,11 @@ nvptx_open_device (int n)
+ 		  &pi, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev);
+   ptx_dev->regs_per_block = pi;
+ 
+-  /* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 is defined only
++  /* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR is defined only
+      in CUDA 6.0 and newer.  */
+-  r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi, 82, dev);
++  r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
++			 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
++			 dev);
+   /* Fallback: use limit of registers per block, which is usually equal.  */
+   if (r == CUDA_ERROR_INVALID_VALUE)
+     pi = ptx_dev->regs_per_block;
+@@ -797,12 +826,24 @@ nvptx_open_device (int n)
+       GOMP_PLUGIN_error ("Only warp size 32 is supported");
+       return NULL;
+     }
++  ptx_dev->warp_size = pi;
++
++  CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
++		  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev);
++  ptx_dev->max_threads_per_block = pi;
++
++  CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
++		  CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, dev);
++  ptx_dev->max_threads_per_multiprocessor = pi;
+ 
+   r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &async_engines,
+ 			 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev);
+   if (r != CUDA_SUCCESS)
+     async_engines = 1;
+ 
++  for (int i = 0; i != GOMP_DIM_MAX; i++)
++    ptx_dev->default_dims[i] = 0;
++
+   ptx_dev->images = NULL;
+   pthread_mutex_init (&ptx_dev->image_lock, NULL);
+ 
+@@ -876,12 +917,42 @@ notify_var (const char *var_name, const
+     GOMP_PLUGIN_debug (0, "%s: '%s'\n", var_name, env_var);
+ }
+ 
++static void
++process_GOMP_NVPTX_JIT (intptr_t *gomp_nvptx_o)
++{
++  const char *var_name = "GOMP_NVPTX_JIT";
++  const char *env_var = secure_getenv (var_name);
++  notify_var (var_name, env_var);
++
++  if (env_var == NULL)
++    return;
++
++  const char *c = env_var;
++  while (*c != '\0')
++    {
++      while (*c == ' ')
++	c++;
++
++      if (c[0] == '-' && c[1] == 'O'
++	  && '0' <= c[2] && c[2] <= '4'
++	  && (c[3] == '\0' || c[3] == ' '))
++	{
++	  *gomp_nvptx_o = c[2] - '0';
++	  c += 3;
++	  continue;
++	}
++
++      GOMP_PLUGIN_error ("Error parsing %s", var_name);
++      break;
++    }
++}
++
+ static bool
+ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
+ 	  unsigned num_objs)
+ {
+-  CUjit_option opts[6];
+-  void *optvals[6];
++  CUjit_option opts[7];
++  void *optvals[7];
+   float elapsed = 0.0;
+   char elog[1024];
+   char ilog[16384];
+@@ -908,16 +979,41 @@ link_ptx (CUmodule *module, const struct
+   opts[5] = CU_JIT_LOG_VERBOSE;
+   optvals[5] = (void *) 1;
+ 
+-  CUDA_CALL (cuLinkCreate, 6, opts, optvals, &linkstate);
++  static intptr_t gomp_nvptx_o = -1;
++
++  static bool init_done = false;
++  if (!init_done)
++    {
++      process_GOMP_NVPTX_JIT (&gomp_nvptx_o);
++      init_done = true;
++  }
++
++  int nopts = 6;
++  if (gomp_nvptx_o != -1)
++    {
++      opts[nopts] = CU_JIT_OPTIMIZATION_LEVEL;
++      optvals[nopts] = (void *) gomp_nvptx_o;
++      nopts++;
++    }
++
++  if (CUDA_CALL_EXISTS (cuLinkCreate_v2))
++    CUDA_CALL (cuLinkCreate_v2, nopts, opts, optvals, &linkstate);
++  else
++    CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
+ 
+   for (; num_objs--; ptx_objs++)
+     {
+       /* cuLinkAddData's 'data' argument erroneously omits the const
+ 	 qualifier.  */
+       GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
+-      r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
+-			     (char *) ptx_objs->code, ptx_objs->size,
+-			     0, 0, 0, 0);
++      if (CUDA_CALL_EXISTS (cuLinkAddData_v2))
++	r = CUDA_CALL_NOCHECK (cuLinkAddData_v2, linkstate, CU_JIT_INPUT_PTX,
++			       (char *) ptx_objs->code, ptx_objs->size,
++			       0, 0, 0, 0);
++      else
++	r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
++			       (char *) ptx_objs->code, ptx_objs->size,
++			       0, 0, 0, 0);
+       if (r != CUDA_SUCCESS)
+ 	{
+ 	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+@@ -1067,8 +1163,10 @@ nvptx_exec (void (*fn), size_t mapnum, v
+   int i;
+   struct ptx_stream *dev_str;
+   void *kargs[1];
+-  void *hp, *dp;
++  void *hp;
++  CUdeviceptr dp = 0;
+   struct nvptx_thread *nvthd = nvptx_thread ();
++  int warp_size = nvthd->ptx_dev->warp_size;
+   const char *maybe_abort_msg = "(perhaps abort was called)";
+ 
+   function = targ_fn->fn;
+@@ -1090,68 +1188,36 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ 
+   if (seen_zero)
+     {
+-      /* See if the user provided GOMP_OPENACC_DIM environment
+-	 variable to specify runtime defaults. */
+-      static int default_dims[GOMP_DIM_MAX];
+-
+       pthread_mutex_lock (&ptx_dev_lock);
+-      if (!default_dims[0])
+-	{
+-	  const char *var_name = "GOMP_OPENACC_DIM";
+-	  /* We only read the environment variable once.  You can't
+-	     change it in the middle of execution.  The syntax  is
+-	     the same as for the -fopenacc-dim compilation option.  */
+-	  const char *env_var = getenv (var_name);
+-	  notify_var (var_name, env_var);
+-	  if (env_var)
+-	    {
+-	      const char *pos = env_var;
+ 
+-	      for (i = 0; *pos && i != GOMP_DIM_MAX; i++)
+-		{
+-		  if (i && *pos++ != ':')
+-		    break;
+-		  if (*pos != ':')
+-		    {
+-		      const char *eptr;
+-
+-		      errno = 0;
+-		      long val = strtol (pos, (char **)&eptr, 10);
+-		      if (errno || val < 0 || (unsigned)val != val)
+-			break;
+-		      default_dims[i] = (int)val;
+-		      pos = eptr;
+-		    }
+-		}
+-	    }
++      static int gomp_openacc_dims[GOMP_DIM_MAX];
++      if (!gomp_openacc_dims[0])
++	{
++	  /* See if the user provided GOMP_OPENACC_DIM environment
++	     variable to specify runtime defaults.  */
++	  for (int i = 0; i < GOMP_DIM_MAX; ++i)
++	    gomp_openacc_dims[i] = GOMP_PLUGIN_acc_default_dim (i);
++	}
+ 
+-	  int warp_size, block_size, dev_size, cpu_size;
+-	  CUdevice dev = nvptx_thread()->ptx_dev->dev;
+-	  /* 32 is the default for known hardware.  */
+-	  int gang = 0, worker = 32, vector = 32;
+-	  CUdevice_attribute cu_tpb, cu_ws, cu_mpc, cu_tpm;
+-
+-	  cu_tpb = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK;
+-	  cu_ws = CU_DEVICE_ATTRIBUTE_WARP_SIZE;
+-	  cu_mpc = CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT;
+-	  cu_tpm  = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR;
+-
+-	  if (CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &block_size, cu_tpb,
+-				 dev) == CUDA_SUCCESS
+-	      && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &warp_size, cu_ws,
+-				    dev) == CUDA_SUCCESS
+-	      && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &dev_size, cu_mpc,
+-				    dev) == CUDA_SUCCESS
+-	      && CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &cpu_size, cu_tpm,
+-				    dev) == CUDA_SUCCESS)
+-	    {
+-	      GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
+-				 " dev_size=%d, cpu_size=%d\n",
+-				 warp_size, block_size, dev_size, cpu_size);
+-	      gang = (cpu_size / block_size) * dev_size;
+-	      worker = block_size / warp_size;
+-	      vector = warp_size;
+-	    }
++      if (!nvthd->ptx_dev->default_dims[0])
++	{
++	  int default_dims[GOMP_DIM_MAX];
++	  for (int i = 0; i < GOMP_DIM_MAX; ++i)
++	    default_dims[i] = gomp_openacc_dims[i];
++
++	  int gang, worker, vector;
++	  {
++	    int block_size = nvthd->ptx_dev->max_threads_per_block;
++	    int cpu_size = nvthd->ptx_dev->max_threads_per_multiprocessor;
++	    int dev_size = nvthd->ptx_dev->num_sms;
++	    GOMP_PLUGIN_debug (0, " warp_size=%d, block_size=%d,"
++			       " dev_size=%d, cpu_size=%d\n",
++			       warp_size, block_size, dev_size, cpu_size);
++
++	    gang = (cpu_size / block_size) * dev_size;
++	    worker = block_size / warp_size;
++	    vector = warp_size;
++	  }
+ 
+ 	  /* There is no upper bound on the gang size.  The best size
+ 	     matches the hardware configuration.  Logical gangs are
+@@ -1172,29 +1238,150 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ 			     default_dims[GOMP_DIM_GANG],
+ 			     default_dims[GOMP_DIM_WORKER],
+ 			     default_dims[GOMP_DIM_VECTOR]);
++
++	  for (i = 0; i != GOMP_DIM_MAX; i++)
++	    nvthd->ptx_dev->default_dims[i] = default_dims[i];
+ 	}
+       pthread_mutex_unlock (&ptx_dev_lock);
+ 
+-      for (i = 0; i != GOMP_DIM_MAX; i++)
+-	if (!dims[i])
+-	  dims[i] = default_dims[i];
+-    }
+-
+-  /* This reserves a chunk of a pre-allocated page of memory mapped on both
+-     the host and the device. HP is a host pointer to the new chunk, and DP is
+-     the corresponding device pointer.  */
+-  map_push (dev_str, async, mapnum * sizeof (void *), &hp, &dp);
+-
+-  GOMP_PLUGIN_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
+-
+-  /* Copy the array of arguments to the mapped page.  */
+-  for (i = 0; i < mapnum; i++)
+-    ((void **) hp)[i] = devaddrs[i];
+-
+-  /* Copy the (device) pointers to arguments to the device (dp and hp might in
+-     fact have the same value on a unified-memory system).  */
+-  CUDA_CALL_ASSERT (cuMemcpy, (CUdeviceptr) dp, (CUdeviceptr) hp,
+-		    mapnum * sizeof (void *));
++      {
++	bool default_dim_p[GOMP_DIM_MAX];
++	for (i = 0; i != GOMP_DIM_MAX; i++)
++	  default_dim_p[i] = !dims[i];
++
++	if (!CUDA_CALL_EXISTS (cuOccupancyMaxPotentialBlockSize))
++	  {
++	    for (i = 0; i != GOMP_DIM_MAX; i++)
++	      if (default_dim_p[i])
++		dims[i] = nvthd->ptx_dev->default_dims[i];
++
++	    if (default_dim_p[GOMP_DIM_VECTOR])
++	      dims[GOMP_DIM_VECTOR]
++		= MIN (dims[GOMP_DIM_VECTOR],
++		       (targ_fn->max_threads_per_block / warp_size
++			* warp_size));
++
++	    if (default_dim_p[GOMP_DIM_WORKER])
++	      dims[GOMP_DIM_WORKER]
++		= MIN (dims[GOMP_DIM_WORKER],
++		       targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR]);
++	  }
++	else
++	  {
++	    /* Handle the case that the compiler allows the runtime to choose
++	       the vector-length conservatively, by ignoring
++	       gomp_openacc_dims[GOMP_DIM_VECTOR].  TODO: actually handle
++	       it.  */
++	    int vectors = 0;
++	    /* TODO: limit gomp_openacc_dims[GOMP_DIM_WORKER] such that that
++	       gomp_openacc_dims[GOMP_DIM_WORKER] * actual_vectors does not
++	       exceed targ_fn->max_threads_per_block. */
++	    int workers = gomp_openacc_dims[GOMP_DIM_WORKER];
++	    int gangs = gomp_openacc_dims[GOMP_DIM_GANG];
++	    int grids, blocks;
++
++	    CUDA_CALL_ASSERT (cuOccupancyMaxPotentialBlockSize, &grids,
++			      &blocks, function, NULL, 0,
++			      dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]);
++	    GOMP_PLUGIN_debug (0, "cuOccupancyMaxPotentialBlockSize: "
++			       "grid = %d, block = %d\n", grids, blocks);
++
++	    /* Keep the num_gangs proportional to the block size.  In
++	       the case were a block size is limited by shared-memory
++	       or the register file capacity, the runtime will not
++	       excessively over assign gangs to the multiprocessor
++	       units if their state is going to be swapped out even
++	       more than necessary. The constant factor 2 is there to
++	       prevent threads from idling when there is insufficient
++	       work for them.  */
++	    if (gangs == 0)
++	      gangs = 2 * grids * (blocks / warp_size);
++
++	    if (vectors == 0)
++	      vectors = warp_size;
++
++	    if (workers == 0)
++	      {
++		int actual_vectors = (default_dim_p[GOMP_DIM_VECTOR]
++				      ? vectors
++				      : dims[GOMP_DIM_VECTOR]);
++		workers = blocks / actual_vectors;
++		workers = MAX (workers, 1);
++		/* If we need a per-worker barrier ... .  */
++		if (actual_vectors > 32)
++		  /* Don't use more barriers than available.  */
++		  workers = MIN (workers, 15);
++	      }
++
++	    for (i = 0; i != GOMP_DIM_MAX; i++)
++	      if (default_dim_p[i])
++		switch (i)
++		  {
++		  case GOMP_DIM_GANG: dims[i] = gangs; break;
++		  case GOMP_DIM_WORKER: dims[i] = workers; break;
++		  case GOMP_DIM_VECTOR: dims[i] = vectors; break;
++		  default: GOMP_PLUGIN_fatal ("invalid dim");
++		  }
++	  }
++      }
++    }
++
++  /* Check if the accelerator has sufficient hardware resources to
++     launch the offloaded kernel.  */
++  if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]
++      > targ_fn->max_threads_per_block)
++    {
++      const char *msg
++	= ("The Nvidia accelerator has insufficient resources to launch '%s'"
++	   " with num_workers = %d and vector_length = %d"
++	   "; "
++	   "recompile the program with 'num_workers = x and vector_length = y'"
++	   " on that offloaded region or '-fopenacc-dim=:x:y' where"
++	   " x * y <= %d"
++	   ".\n");
++      GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
++			 dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block);
++    }
++
++  /* Check if the accelerator has sufficient barrier resources to
++     launch the offloaded kernel.  */
++  if (dims[GOMP_DIM_WORKER] > 15 && dims[GOMP_DIM_VECTOR] > 32)
++    {
++      const char *msg
++	= ("The Nvidia accelerator has insufficient barrier resources to launch"
++	   " '%s' with num_workers = %d and vector_length = %d"
++	   "; "
++	   "recompile the program with 'num_workers = x' on that offloaded"
++	   " region or '-fopenacc-dim=:x:' where x <= 15"
++	   "; "
++	   "or, recompile the program with 'vector_length = 32' on that"
++	   " offloaded region or '-fopenacc-dim=::32'"
++	   ".\n");
++	GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
++			   dims[GOMP_DIM_VECTOR]);
++    }
++
++  if (mapnum > 0)
++    {
++      /* This reserves a chunk of a pre-allocated page of memory mapped on both
++	 the host and the device. HP is a host pointer to the new chunk, and DP is
++	 the corresponding device pointer.  */
++      pthread_mutex_lock (&ptx_event_lock);
++      dp = map_push (dev_str, mapnum * sizeof (void *));
++      pthread_mutex_unlock (&ptx_event_lock);
++
++      GOMP_PLUGIN_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
++
++      /* Copy the array of arguments to the mapped page.  */
++      hp = alloca(sizeof(void *) * mapnum);
++      for (i = 0; i < mapnum; i++)
++	((void **) hp)[i] = devaddrs[i];
++
++      /* Copy the (device) pointers to arguments to the device */
++      CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
++			mapnum * sizeof (void *));
++    }
++
+   GOMP_PLUGIN_debug (0, "  %s: kernel %s: launch"
+ 		     " gangs=%u, workers=%u, vectors=%u\n",
+ 		     __FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG],
+@@ -1239,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ 
+       CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream);
+ 
+-      event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
++      if (mapnum > 0)
++	event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
+     }
+ #else
+   r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
+@@ -1256,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, v
+ #ifndef DISABLE_ASYNC
+   if (async < acc_async_noval)
+ #endif
+-    map_pop (dev_str);
++    {
++      if (mapnum > 0)
++	map_pop (dev_str);
++    }
+ }
+ 
+ void * openacc_get_current_cuda_context (void);
+@@ -1415,9 +1606,8 @@ nvptx_async_test (int async)
+   struct ptx_stream *s;
+ 
+   s = select_stream_for_async (async, pthread_self (), false, NULL);
+-
+   if (!s)
+-    GOMP_PLUGIN_fatal ("unknown async %d", async);
++    return 1;
+ 
+   r = CUDA_CALL_NOCHECK (cuStreamQuery, s->stream);
+   if (r == CUDA_SUCCESS)
+@@ -1472,7 +1662,7 @@ nvptx_wait (int async)
+ 
+   s = select_stream_for_async (async, pthread_self (), false, NULL);
+   if (!s)
+-    GOMP_PLUGIN_fatal ("unknown async %d", async);
++    return;
+ 
+   CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
+ 
+@@ -1486,16 +1676,17 @@ nvptx_wait_async (int async1, int async2
+   struct ptx_stream *s1, *s2;
+   pthread_t self = pthread_self ();
+ 
++  s1 = select_stream_for_async (async1, self, false, NULL);
++  if (!s1)
++    return;
++
+   /* The stream that is waiting (rather than being waited for) doesn't
+      necessarily have to exist already.  */
+   s2 = select_stream_for_async (async2, self, true, NULL);
+ 
+-  s1 = select_stream_for_async (async1, self, false, NULL);
+-  if (!s1)
+-    GOMP_PLUGIN_fatal ("invalid async 1\n");
+-
++  /* A stream is always synchronized with itself.  */
+   if (s1 == s2)
+-    GOMP_PLUGIN_fatal ("identical parameters");
++    return;
+ 
+   e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
+ 
+@@ -1629,8 +1820,14 @@ nvptx_set_cuda_stream (int async, void *
+   pthread_t self = pthread_self ();
+   struct nvptx_thread *nvthd = nvptx_thread ();
+ 
+-  if (async < 0)
+-    GOMP_PLUGIN_fatal ("bad async %d", async);
++  /* Due to the "null_stream" usage for "acc_async_sync", this cannot be used
++     to change the stream handle associated with "acc_async_sync".  */
++  if (async == acc_async_sync)
++    {
++      GOMP_PLUGIN_debug (0, "Refusing request to set CUDA stream associated"
++			 " with \"acc_async_sync\"\n");
++      return 0;
++    }
+ 
+   pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+ 
+@@ -1739,6 +1936,12 @@ GOMP_OFFLOAD_fini_device (int n)
+       instantiated_devices--;
+     }
+ 
++  if (instantiated_devices == 0)
++    {
++      free (ptx_devices);
++      ptx_devices = NULL;
++    }
++
+   pthread_mutex_unlock (&ptx_dev_lock);
+   return true;
+ }
+--- libgomp/plugin/configfrag.ac.jj	2018-04-25 09:40:31.914655581 +0200
++++ libgomp/plugin/configfrag.ac	2019-05-07 18:46:36.533109624 +0200
+@@ -26,8 +26,6 @@
+ # see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ # <http://www.gnu.org/licenses/>.
+ 
+-offload_targets=
+-AC_SUBST(offload_targets)
+ plugin_support=yes
+ AC_CHECK_LIB(dl, dlsym, , [plugin_support=no])
+ if test x"$plugin_support" = xyes; then
+@@ -59,7 +57,11 @@ AC_ARG_WITH(cuda-driver-lib,
+ 	[AS_HELP_STRING([--with-cuda-driver-lib=PATH],
+ 		[specify directory for the installed CUDA driver library])])
+ case "x$with_cuda_driver" in
+-  x | xno) ;;
++  x) ;;
++  xno)
++    CUDA_DRIVER_INCLUDE=no
++    CUDA_DRIVER_LIB=no
++    ;;
+   *) CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+      CUDA_DRIVER_LIB=$with_cuda_driver/lib
+      ;;
+@@ -70,10 +72,12 @@ fi
+ if test "x$with_cuda_driver_lib" != x; then
+   CUDA_DRIVER_LIB=$with_cuda_driver_lib
+ fi
+-if test "x$CUDA_DRIVER_INCLUDE" != x; then
++if test "x$CUDA_DRIVER_INCLUDE" != x \
++   && test "x$CUDA_DRIVER_INCLUDE" != xno; then
+   CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+ fi
+-if test "x$CUDA_DRIVER_LIB" != x; then
++if test "x$CUDA_DRIVER_LIB" != x \
++   && test "x$CUDA_DRIVER_LIB" != xno; then
+   CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+ fi
+ 
+@@ -133,7 +137,13 @@ AC_SUBST(PLUGIN_HSA_CPPFLAGS)
+ AC_SUBST(PLUGIN_HSA_LDFLAGS)
+ AC_SUBST(PLUGIN_HSA_LIBS)
+ 
+-# Get offload targets and path to install tree of offloading compiler.
++# Parse '--enable-offload-targets', figure out the corresponding libgomp
++# plugins, and configure to find the corresponding offload compilers.
++# 'offload_plugins' and 'offload_targets' will be populated in the same order.
++offload_plugins=
++offload_targets=
++AC_SUBST(offload_plugins)
++AC_SUBST(offload_targets)
+ offload_additional_options=
+ offload_additional_lib_paths=
+ AC_SUBST(offload_additional_options)
+@@ -152,10 +152,10 @@ if test x"$enable_offload_targets" != x;
+   for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+     tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+     tgt=`echo $tgt | sed 's/=.*//'`
+-    tgt_name=
++    tgt_plugin=
+     case $tgt in
+       *-intelmic-* | *-intelmicemul-*)
+-	tgt_name=intelmic
++	tgt_plugin=intelmic
+ 	;;
+       nvptx*)
+ 	case "${target}" in
+@@ -167,30 +167,35 @@ if test x"$enable_offload_targets" != x;
+ 		PLUGIN_NVPTX=0
+ 		;;
+ 	      *)
+-		tgt_name=nvptx
++		tgt_plugin=nvptx
+ 		PLUGIN_NVPTX=$tgt
+-		PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+-		PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+-		PLUGIN_NVPTX_LIBS='-lcuda'
++		if test "x$CUDA_DRIVER_LIB" != xno \
++		   && test "x$CUDA_DRIVER_LIB" != xno; then
++		  PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
++		  PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
++		  PLUGIN_NVPTX_LIBS='-lcuda'
+ 
+-		PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+-		CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+-		PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+-		LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+-		PLUGIN_NVPTX_save_LIBS=$LIBS
+-		LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+-		AC_LINK_IFELSE(
+-		  [AC_LANG_PROGRAM(
+-		    [#include "cuda.h"],
+-		      [CUresult r = cuCtxPushCurrent (NULL);])],
+-		  [PLUGIN_NVPTX=1])
+-		CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+-		LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+-		LIBS=$PLUGIN_NVPTX_save_LIBS
++		  PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
++		  CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
++		  PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
++		  LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
++		  PLUGIN_NVPTX_save_LIBS=$LIBS
++		  LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
++		  AC_LINK_IFELSE(
++		    [AC_LANG_PROGRAM(
++		      [#include "cuda.h"],
++			[CUresult r = cuCtxPushCurrent (NULL);])],
++		    [PLUGIN_NVPTX=1])
++		  CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
++		  LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
++		  LIBS=$PLUGIN_NVPTX_save_LIBS
++		fi
+ 		case $PLUGIN_NVPTX in
+ 		  nvptx*)
+-		    if test "x$CUDA_DRIVER_INCLUDE" = x \
+-		       && test "x$CUDA_DRIVER_LIB" = x; then
++		    if (test "x$CUDA_DRIVER_INCLUDE" = x \
++			|| test "x$CUDA_DRIVER_INCLUDE" = xno) \
++		       && (test "x$CUDA_DRIVER_LIB" = x \
++			   || test "x$CUDA_DRIVER_LIB" = xno); then
+ 		      PLUGIN_NVPTX=1
+ 		      PLUGIN_NVPTX_CPPFLAGS='-I$(srcdir)/plugin/cuda'
+ 		      PLUGIN_NVPTX_LIBS='-ldl'
+@@ -191,7 +206,7 @@ if test x"$enable_offload_targets" != x;
+ 	        PLUGIN_HSA=0
+ 		;;
+ 	      *)
+-	        tgt_name=hsa
++		tgt_plugin=hsa
+ 	        PLUGIN_HSA=$tgt
+ 	        PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ 	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+@@ -209,7 +224,7 @@ if test x"$enable_offload_targets" != x;
+ 	        LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ 	        LIBS=$PLUGIN_HSA_save_LIBS
+ 	        case $PLUGIN_HSA in
+-	          hsa*)
++		  hsa*)
+ 	            HSA_PLUGIN=0
+ 	            AC_MSG_ERROR([HSA run-time package required for HSA support])
+ 	            ;;
+@@ -226,16 +241,19 @@ if test x"$enable_offload_targets" != x;
+ 	AC_MSG_ERROR([unknown offload target specified])
+ 	;;
+     esac
+-    if test x"$tgt_name" = x; then
+-      # Don't configure libgomp for this offloading target if we don't build
+-      # the corresponding plugin.
++    if test x"$tgt_plugin" = x; then
++      # Not configuring libgomp for this offload target if we're not building
++      # the corresponding offload plugin.
+       continue
+-    elif test x"$offload_targets" = x; then
+-      offload_targets=$tgt_name
++    elif test x"$offload_plugins" = x; then
++      offload_plugins=$tgt_plugin
++      offload_targets=$tgt
+     else
+-      offload_targets=$offload_targets,$tgt_name
++      offload_plugins=$offload_plugins,$tgt_plugin
++      offload_targets=$offload_targets,$tgt
+     fi
+-    if test "$tgt_name" = hsa; then
++    # Configure additional search paths.
++    if test "$tgt_plugin" = hsa; then
+       # Offloading compilation is all handled by the target compiler.
+       :
+     elif test x"$tgt_dir" != x; then
+@@ -247,8 +265,8 @@ if test x"$enable_offload_targets" != x;
+     fi
+   done
+ fi
+-AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
+-  [Define to offload targets, separated by commas.])
++AC_DEFINE_UNQUOTED(OFFLOAD_PLUGINS, "$offload_plugins",
++  [Define to offload plugins, separated by commas.])
+ AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
+ AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
+   [Define to 1 if the NVIDIA plugin is built, 0 if not.])
+--- libgomp/affinity-fmt.c.jj	2019-05-07 18:46:36.285113585 +0200
++++ libgomp/affinity-fmt.c	2019-05-07 18:46:36.285113585 +0200
+@@ -0,0 +1,495 @@
++/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include <string.h>
++#include <stdio.h>
++#include <stdlib.h>
++#ifdef HAVE_UNISTD_H
++#include <unistd.h>
++#endif
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIx64.  */
++#endif
++#ifdef HAVE_UNAME
++#include <sys/utsname.h>
++#endif
++
++void
++gomp_print_string (const char *str, size_t len)
++{
++  fwrite (str, 1, len, stderr);
++}
++
++void
++gomp_set_affinity_format (const char *format, size_t len)
++{
++  if (len < gomp_affinity_format_len)
++    memcpy (gomp_affinity_format_var, format, len);
++  else
++    {
++      char *p;
++      if (gomp_affinity_format_len)
++	p = gomp_realloc (gomp_affinity_format_var, len + 1);
++      else
++	p = gomp_malloc (len + 1);
++      memcpy (p, format, len);
++      gomp_affinity_format_var = p;
++      gomp_affinity_format_len = len + 1;
++    }
++  gomp_affinity_format_var[len] = '\0';
++}
++
++void
++omp_set_affinity_format (const char *format)
++{
++  gomp_set_affinity_format (format, strlen (format));
++}
++
++size_t
++omp_get_affinity_format (char *buffer, size_t size)
++{
++  size_t len = strlen (gomp_affinity_format_var);
++  if (size)
++    {
++      if (len < size)
++	memcpy (buffer, gomp_affinity_format_var, len + 1);
++      else
++	{
++	  memcpy (buffer, gomp_affinity_format_var, size - 1);
++	  buffer[size - 1] = '\0';
++	}
++    }
++  return len;
++}
++
++void
++gomp_display_string (char *buffer, size_t size, size_t *ret,
++		     const char *str, size_t len)
++{
++  size_t r = *ret;
++  if (size && r < size)
++    {
++      size_t l = len;
++      if (size - r < len)
++	l = size - r;
++      memcpy (buffer + r, str, l);
++    }
++  *ret += len;
++  if (__builtin_expect (r > *ret, 0))
++    gomp_fatal ("overflow in omp_capture_affinity");
++}
++
++static void
++gomp_display_repeat (char *buffer, size_t size, size_t *ret,
++		     char c, size_t len)
++{
++  size_t r = *ret;
++  if (size && r < size)
++    {
++      size_t l = len;
++      if (size - r < len)
++	l = size - r;
++      memset (buffer + r, c, l);
++    }
++  *ret += len;
++  if (__builtin_expect (r > *ret, 0))
++    gomp_fatal ("overflow in omp_capture_affinity");
++}
++
++static void
++gomp_display_num (char *buffer, size_t size, size_t *ret,
++		  bool zero, bool right, size_t sz, char *buf)
++{
++  size_t l = strlen (buf);
++  if (sz == (size_t) -1 || l >= sz)
++    {
++      gomp_display_string (buffer, size, ret, buf, l);
++      return;
++    }
++  if (zero)
++    {
++      if (buf[0] == '-')
++	gomp_display_string (buffer, size, ret, buf, 1);
++      else if (buf[0] == '0' && buf[1] == 'x')
++	gomp_display_string (buffer, size, ret, buf, 2);
++      gomp_display_repeat (buffer, size, ret, '0', sz - l);
++      if (buf[0] == '-')
++	gomp_display_string (buffer, size, ret, buf + 1, l - 1);
++      else if (buf[0] == '0' && buf[1] == 'x')
++	gomp_display_string (buffer, size, ret, buf + 2, l - 2);
++      else
++	gomp_display_string (buffer, size, ret, buf, l);
++    }
++  else if (right)
++    {
++      gomp_display_repeat (buffer, size, ret, ' ', sz - l);
++      gomp_display_string (buffer, size, ret, buf, l);
++    }
++  else
++    {
++      gomp_display_string (buffer, size, ret, buf, l);
++      gomp_display_repeat (buffer, size, ret, ' ', sz - l);
++    }
++}
++
++static void
++gomp_display_int (char *buffer, size_t size, size_t *ret,
++		  bool zero, bool right, size_t sz, int num)
++{
++  char buf[3 * sizeof (int) + 2];
++  sprintf (buf, "%d", num);
++  gomp_display_num (buffer, size, ret, zero, right, sz, buf);
++}
++
++static void
++gomp_display_string_len (char *buffer, size_t size, size_t *ret,
++			 bool right, size_t sz, char *str, size_t len)
++{
++  if (sz == (size_t) -1 || len >= sz)
++    {
++      gomp_display_string (buffer, size, ret, str, len);
++      return;
++    }
++
++  if (right)
++    {
++      gomp_display_repeat (buffer, size, ret, ' ', sz - len);
++      gomp_display_string (buffer, size, ret, str, len);
++    }
++  else
++    {
++      gomp_display_string (buffer, size, ret, str, len);
++      gomp_display_repeat (buffer, size, ret, ' ', sz - len);
++    }
++}
++
++static void
++gomp_display_hostname (char *buffer, size_t size, size_t *ret,
++		       bool right, size_t sz)
++{
++#ifdef HAVE_GETHOSTNAME
++  {
++    char buf[256];
++    char *b = buf;
++    size_t len = 256;
++    do
++      {
++	b[len - 1] = '\0';
++	if (gethostname (b, len - 1) == 0)
++	  {
++	    size_t l = strlen (b);
++	    if (l < len - 1)
++	      {
++		gomp_display_string_len (buffer, size, ret,
++					 right, sz, b, l);
++		if (b != buf)
++		  free (b);
++		return;
++	      }
++	  }
++	if (len == 1048576)
++	  break;
++	len = len * 2;
++	if (len == 512)
++	  b = gomp_malloc (len);
++	else
++	  b = gomp_realloc (b, len);
++      }
++    while (1);
++    if (b != buf)
++      free (b);
++  }
++#endif
++#ifdef HAVE_UNAME
++  {
++    struct utsname buf;
++    if (uname (&buf) == 0)
++      {
++	gomp_display_string_len (buffer, size, ret, right, sz,
++				 buf.nodename, strlen (buf.nodename));
++	return;
++      }
++  }
++#endif
++  gomp_display_string_len (buffer, size, ret, right, sz, "node", 4);
++}
++
++struct affinity_types_struct {
++  char long_str[18];
++  char long_len;
++  char short_c; };
++
++static struct affinity_types_struct affinity_types[] =
++{
++#define AFFINITY_TYPE(l, s) \
++  { #l, sizeof (#l) - 1, s }
++  AFFINITY_TYPE (team_num, 't'),
++  AFFINITY_TYPE (num_teams, 'T'),
++  AFFINITY_TYPE (nesting_level, 'L'),
++  AFFINITY_TYPE (thread_num, 'n'),
++  AFFINITY_TYPE (num_threads, 'N'),
++  AFFINITY_TYPE (ancestor_tnum, 'a'),
++  AFFINITY_TYPE (host, 'H'),
++  AFFINITY_TYPE (process_id, 'P'),
++  AFFINITY_TYPE (native_thread_id, 'i'),
++  AFFINITY_TYPE (thread_affinity, 'A')
++#undef AFFINITY_TYPE
++};
++
++size_t
++gomp_display_affinity (char *buffer, size_t size,
++		       const char *format, gomp_thread_handle handle,
++		       struct gomp_team_state *ts, unsigned int place)
++{
++  size_t ret = 0;
++  do
++    {
++      const char *p = strchr (format, '%');
++      bool zero = false;
++      bool right = false;
++      size_t sz = -1;
++      char c;
++      int val;
++      if (p == NULL)
++	p = strchr (format, '\0');
++      if (p != format)
++	gomp_display_string (buffer, size, &ret,
++			     format, p - format);
++      if (*p == '\0')
++	break;
++      p++;
++      if (*p == '%')
++	{
++	  gomp_display_string (buffer, size, &ret, "%", 1);
++	  format = p + 1;
++	  continue;
++	}
++      if (*p == '0')
++	{
++	  zero = true;
++	  p++;
++	  if (*p != '.')
++	    gomp_fatal ("leading zero not followed by dot in affinity format");
++	}
++      if (*p == '.')
++	{
++	  right = true;
++	  p++;
++	}
++      if (*p >= '1' && *p <= '9')
++	{
++	  char *end;
++	  sz = strtoul (p, &end, 10);
++	  p = end;
++	}
++      else if (zero || right)
++	gomp_fatal ("leading zero or right justification in affinity format "
++		    "requires size");
++      c = *p;
++      if (c == '{')
++	{
++	  int i;
++	  for (i = 0;
++	       i < sizeof (affinity_types) / sizeof (affinity_types[0]); ++i)
++	    if (strncmp (p + 1, affinity_types[i].long_str,
++			 affinity_types[i].long_len) == 0
++		&& p[affinity_types[i].long_len + 1] == '}')
++	      {
++		c = affinity_types[i].short_c;
++		p += affinity_types[i].long_len + 1;
++		break;
++	      }
++	  if (c == '{')
++	    {
++	      char *q = strchr (p + 1, '}');
++	      if (q)
++		gomp_fatal ("unsupported long type name '%.*s' in affinity "
++			    "format", (int) (q - (p + 1)), p + 1);
++	      else
++		gomp_fatal ("unterminated long type name '%s' in affinity "
++			    "format", p + 1);
++	    }
++	}
++      switch (c)
++	{
++	case 't':
++	  val = omp_get_team_num ();
++	  goto do_int;
++	case 'T':
++	  val = omp_get_num_teams ();
++	  goto do_int;
++	case 'L':
++	  val = ts->level;
++	  goto do_int;
++	case 'n':
++	  val = ts->team_id;
++	  goto do_int;
++	case 'N':
++	  val = ts->team ? ts->team->nthreads : 1;
++	  goto do_int;
++	case 'a':
++	  val = ts->team ? ts->team->prev_ts.team_id : -1;
++	  goto do_int;
++	case 'H':
++	  gomp_display_hostname (buffer, size, &ret, right, sz);
++	  break;
++	case 'P':
++#ifdef HAVE_GETPID
++	  val = getpid ();
++#else
++	  val = 0;
++#endif
++	  goto do_int;
++	case 'i':
++#if defined(LIBGOMP_USE_PTHREADS) && defined(__GNUC__)
++	  {
++	    char buf[3 * (sizeof (handle) + sizeof (uintptr_t) + sizeof (int))
++		     + 4];
++	    /* This macro returns expr unmodified for integral or pointer
++	       types and 0 for anything else (e.g. aggregates).  */
++#define gomp_nonaggregate(expr) \
++  __builtin_choose_expr (__builtin_classify_type (expr) == 1		    \
++			 || __builtin_classify_type (expr) == 5, expr, 0)
++	    /* This macro returns expr unmodified for integral types,
++	       (uintptr_t) (expr) for pointer types and 0 for anything else
++	       (e.g. aggregates).  */
++#define gomp_integral(expr) \
++  __builtin_choose_expr (__builtin_classify_type (expr) == 5,		    \
++			 (uintptr_t) gomp_nonaggregate (expr),		    \
++			 gomp_nonaggregate (expr))
++
++	    if (sizeof (gomp_integral (handle)) == sizeof (unsigned long))
++	      sprintf (buf, "0x%lx", (unsigned long) gomp_integral (handle));
++#if defined (HAVE_INTTYPES_H) && defined (PRIx64)
++	    else if (sizeof (gomp_integral (handle)) == sizeof (uint64_t))
++	      sprintf (buf, "0x%" PRIx64, (uint64_t) gomp_integral (handle));
++#else
++	    else if (sizeof (gomp_integral (handle))
++		     == sizeof (unsigned long long))
++	      sprintf (buf, "0x%llx",
++		       (unsigned long long) gomp_integral (handle));
++#endif
++	    else
++	      sprintf (buf, "0x%x", (unsigned int) gomp_integral (handle));
++	    gomp_display_num (buffer, size, &ret, zero, right, sz, buf);
++	    break;
++	  }
++#else
++	  val = 0;
++	  goto do_int;
++#endif
++	case 'A':
++	  if (sz == (size_t) -1)
++	    gomp_display_affinity_place (buffer, size, &ret,
++					 place - 1);
++	  else if (right)
++	    {
++	      size_t len = 0;
++	      gomp_display_affinity_place (NULL, 0, &len, place - 1);
++	      if (len < sz)
++		gomp_display_repeat (buffer, size, &ret, ' ', sz - len);
++	      gomp_display_affinity_place (buffer, size, &ret, place - 1);
++	    }
++	  else
++	    {
++	      size_t start = ret;
++	      gomp_display_affinity_place (buffer, size, &ret, place - 1);
++	      if (ret - start < sz)
++		gomp_display_repeat (buffer, size, &ret, ' ', sz - (ret - start));
++	    }
++	  break;
++	do_int:
++	  gomp_display_int (buffer, size, &ret, zero, right, sz, val);
++	  break;
++	default:
++	  gomp_fatal ("unsupported type %c in affinity format", c);
++	}
++      format = p + 1;
++    }
++  while (1);
++  return ret;
++}
++
++size_t
++omp_capture_affinity (char *buffer, size_t size, const char *format)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  size_t ret
++    = gomp_display_affinity (buffer, size,
++			     format && *format
++			     ? format : gomp_affinity_format_var,
++			     gomp_thread_self (), &thr->ts, thr->place);
++  if (size)
++    {
++      if (ret >= size)
++	buffer[size - 1] = '\0';
++      else
++	buffer[ret] = '\0';
++    }
++  return ret;
++}
++ialias (omp_capture_affinity)
++
++void
++omp_display_affinity (const char *format)
++{
++  char buf[512];
++  char *b;
++  size_t ret = ialias_call (omp_capture_affinity) (buf, sizeof buf, format);
++  if (ret < sizeof buf)
++    {
++      buf[ret] = '\n';
++      gomp_print_string (buf, ret + 1);
++      return;
++    }
++  b = gomp_malloc (ret + 1);
++  ialias_call (omp_capture_affinity) (b, ret + 1, format);
++  b[ret] = '\n';
++  gomp_print_string (b, ret + 1);
++  free (b);
++}
++
++void
++gomp_display_affinity_thread (gomp_thread_handle handle,
++			      struct gomp_team_state *ts, unsigned int place)
++{
++  char buf[512];
++  char *b;
++  size_t ret = gomp_display_affinity (buf, sizeof buf, gomp_affinity_format_var,
++				      handle, ts, place);
++  if (ret < sizeof buf)
++    {
++      buf[ret] = '\n';
++      gomp_print_string (buf, ret + 1);
++      return;
++    }
++  b = gomp_malloc (ret + 1);
++  gomp_display_affinity (b, ret + 1, gomp_affinity_format_var,
++  			 handle, ts, place);
++  b[ret] = '\n';
++  gomp_print_string (b, ret + 1);
++  free (b);
++}
+--- libgomp/single.c.jj	2018-04-25 09:40:31.870655561 +0200
++++ libgomp/single.c	2019-05-07 18:46:36.536109576 +0200
+@@ -47,7 +47,7 @@ GOMP_single_start (void)
+   return __sync_bool_compare_and_swap (&team->single_count, single_count,
+ 				       single_count + 1L);
+ #else
+-  bool ret = gomp_work_share_start (false);
++  bool ret = gomp_work_share_start (0);
+   if (ret)
+     gomp_work_share_init_done ();
+   gomp_work_share_end_nowait ();
+@@ -68,7 +68,7 @@ GOMP_single_copy_start (void)
+   bool first;
+   void *ret;
+ 
+-  first = gomp_work_share_start (false);
++  first = gomp_work_share_start (0);
+   
+   if (first)
+     {
+--- libgomp/oacc-cuda.c.jj	2018-04-25 09:40:31.321655307 +0200
++++ libgomp/oacc-cuda.c	2019-05-07 18:46:36.528109704 +0200
+@@ -58,7 +58,7 @@ acc_get_cuda_stream (int async)
+ {
+   struct goacc_thread *thr = goacc_thread ();
+ 
+-  if (async < 0)
++  if (!async_valid_p (async))
+     return NULL;
+ 
+   if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
+@@ -72,7 +72,7 @@ acc_set_cuda_stream (int async, void *st
+ {
+   struct goacc_thread *thr;
+ 
+-  if (async < 0 || stream == NULL)
++  if (!async_valid_p (async) || stream == NULL)
+     return 0;
+ 
+   goacc_lazy_initialize ();
+--- libgomp/work.c.jj	2018-04-25 09:40:31.925655587 +0200
++++ libgomp/work.c	2019-05-07 18:46:36.548109384 +0200
+@@ -76,7 +76,15 @@ alloc_work_share (struct gomp_team *team
+ #endif
+ 
+   team->work_share_chunk *= 2;
++  /* Allocating gomp_work_share structures aligned is just an
++     optimization, don't do it when using the fallback method.  */
++#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
++  ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
++			   team->work_share_chunk
++			   * sizeof (struct gomp_work_share));
++#else
+   ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
++#endif
+   ws->next_alloc = team->work_shares[0].next_alloc;
+   team->work_shares[0].next_alloc = ws;
+   team->work_share_list_alloc = &ws[1];
+@@ -90,30 +98,35 @@ alloc_work_share (struct gomp_team *team
+    This shouldn't touch the next_alloc field.  */
+ 
+ void
+-gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
++gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
+ 		      unsigned nthreads)
+ {
+   gomp_mutex_init (&ws->lock);
+   if (__builtin_expect (ordered, 0))
+     {
+-#define INLINE_ORDERED_TEAM_IDS_CNT \
+-  ((sizeof (struct gomp_work_share) \
+-    - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
+-   / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
+-
+-      if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
+-	ws->ordered_team_ids
+-	  = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
++#define INLINE_ORDERED_TEAM_IDS_SIZE \
++  (sizeof (struct gomp_work_share) \
++   - offsetof (struct gomp_work_share, inline_ordered_team_ids))
++
++      if (__builtin_expect (ordered != 1, 0))
++	{
++	  ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1;
++	  ordered = ordered + __alignof__ (long long) - 1;
++	  ordered &= ~(__alignof__ (long long) - 1);
++	}
++      else
++	ordered = nthreads * sizeof (*ws->ordered_team_ids);
++      if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
++	ws->ordered_team_ids = gomp_malloc (ordered);
+       else
+ 	ws->ordered_team_ids = ws->inline_ordered_team_ids;
+-      memset (ws->ordered_team_ids, '\0',
+-	      nthreads * sizeof (*ws->ordered_team_ids));
++      memset (ws->ordered_team_ids, '\0', ordered);
+       ws->ordered_num_used = 0;
+       ws->ordered_owner = -1;
+       ws->ordered_cur = 0;
+     }
+   else
+-    ws->ordered_team_ids = NULL;
++    ws->ordered_team_ids = ws->inline_ordered_team_ids;
+   gomp_ptrlock_init (&ws->next_ws, NULL);
+   ws->threads_completed = 0;
+ }
+@@ -166,7 +179,7 @@ free_work_share (struct gomp_team *team,
+    if this was the first thread to reach this point.  */
+ 
+ bool
+-gomp_work_share_start (bool ordered)
++gomp_work_share_start (size_t ordered)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+@@ -178,7 +191,7 @@ gomp_work_share_start (bool ordered)
+       ws = gomp_malloc (sizeof (*ws));
+       gomp_init_work_share (ws, ordered, 1);
+       thr->ts.work_share = ws;
+-      return ws;
++      return true;
+     }
+ 
+   ws = thr->ts.work_share;
+--- include/gomp-constants.h.jj	2018-04-25 09:40:39.757659209 +0200
++++ include/gomp-constants.h	2019-05-07 18:57:33.333627031 +0200
+@@ -189,6 +189,7 @@ enum gomp_map_kind
+ #define GOMP_TASK_FLAG_GRAINSIZE	(1 << 9)
+ #define GOMP_TASK_FLAG_IF		(1 << 10)
+ #define GOMP_TASK_FLAG_NOGROUP		(1 << 11)
++#define GOMP_TASK_FLAG_REDUCTION	(1 << 12)
+ 
+ /* GOMP_target{_ext,update_ext,enter_exit_data} flags argument.  */
+ #define GOMP_TARGET_FLAG_NOWAIT		(1 << 0)
+@@ -196,6 +197,18 @@ enum gomp_map_kind
+ /* Internal to libgomp.  */
+ #define GOMP_TARGET_FLAG_UPDATE		(1U << 31)
+ 
++
++/* OpenACC construct flags.  */
++
++/* Force host fallback execution.  */
++#define GOACC_FLAG_HOST_FALLBACK	(1 << 0)
++
++/* For legacy reasons, in the ABI, the GOACC_FLAGs are encoded as an inverted
++   bitmask.  */
++#define GOACC_FLAGS_MARSHAL_OP		BIT_NOT_EXPR
++#define GOACC_FLAGS_UNMARSHAL(X)	(~(X))
++
++
+ /* Versions of libgomp and device-specific plugins.  GOMP_VERSION
+    should be incremented whenever an ABI-incompatible change is introduced
+    to the plugin interface defined in libgomp/libgomp.h.  */
+@@ -251,6 +264,12 @@ enum gomp_map_kind
+    at most and shifted by this many bits.  */
+ #define GOMP_TARGET_ARG_VALUE_SHIFT		16
+ 
++/* Dependence types in omp_depend_t objects.  */
++#define GOMP_DEPEND_IN			1
++#define GOMP_DEPEND_OUT			2
++#define GOMP_DEPEND_INOUT		3
++#define GOMP_DEPEND_MUTEXINOUTSET	4
++
+ /* HSA specific data structures.  */
+ 
+ /* Identifiers of device-specific target arguments.  */
diff --git a/SOURCES/gcc8-pr95614-revert.patch b/SOURCES/gcc8-pr95614-revert.patch
deleted file mode 100644
index c3f9e26..0000000
--- a/SOURCES/gcc8-pr95614-revert.patch
+++ /dev/null
@@ -1,96 +0,0 @@
-2020-09-28  Jakub Jelinek  <jakub@redhat.com>
-
-	Revert:
-	2020-09-27  Steven G. Kargl  <kargl@gcc.gnu.org>
-		    Mark Eggleston  <markeggleston@gcc.gnu.org>
-
-	PR fortran/95614
-	* decl.c (gfc_get_common): Use gfc_match_common_name instead
-	of match_common_name.
-	* decl.c (gfc_bind_idents): Use gfc_match_common_name instead
-	of match_common_name.
-	* match.c : Rename match_common_name to gfc_match_common_name.
-	* match.c (gfc_match_common): Use gfc_match_common_name instead
-	of match_common_name.
-	* match.h : Rename match_common_name to gfc_match_common_name.
-	* resolve.c (resolve_common_vars): Check each symbol in a
-	common block has a global symbol.  If there is a global symbol
-	issue an error if the symbol type is known as is not a common
-	block name.
-
---- gcc/fortran/decl.c
-+++ gcc/fortran/decl.c
-@@ -5789,7 +5789,7 @@ get_bind_c_idents (void)
-       found_id = MATCH_YES;
-       gfc_get_ha_symbol (name, &tmp_sym);
-     }
--  else if (gfc_match_common_name (name) == MATCH_YES)
-+  else if (match_common_name (name) == MATCH_YES)
-     {
-       found_id = MATCH_YES;
-       com_block = gfc_get_common (name, 0);
-@@ -5834,7 +5834,7 @@ get_bind_c_idents (void)
- 	      found_id = MATCH_YES;
- 	      gfc_get_ha_symbol (name, &tmp_sym);
- 	    }
--	  else if (gfc_match_common_name (name) == MATCH_YES)
-+	  else if (match_common_name (name) == MATCH_YES)
- 	    {
- 	      found_id = MATCH_YES;
- 	      com_block = gfc_get_common (name, 0);
---- gcc/fortran/match.c
-+++ gcc/fortran/match.c
-@@ -5028,8 +5028,7 @@ gfc_get_common (const char *name, int from_module)
- 
- /* Match a common block name.  */
- 
--match
--gfc_match_common_name (char *name)
-+match match_common_name (char *name)
- {
-   match m;
- 
-@@ -5073,7 +5072,7 @@ gfc_match_common (void)
- 
-   for (;;)
-     {
--      m = gfc_match_common_name (name);
-+      m = match_common_name (name);
-       if (m == MATCH_ERROR)
- 	goto cleanup;
- 
---- gcc/fortran/match.h
-+++ gcc/fortran/match.h
-@@ -103,9 +103,11 @@ match gfc_match_call (void);
- 
- /* We want to use this function to check for a common-block-name
-    that can exist in a bind statement, so removed the "static"
--   declaration of the function in match.c. */
-+   declaration of the function in match.c.
-  
--match gfc_match_common_name (char *name);
-+   TODO: should probably rename this now that it'll be globally seen to
-+   gfc_match_common_name.  */
-+match match_common_name (char *name);
- 
- match gfc_match_common (void);
- match gfc_match_block_data (void);
---- gcc/fortran/resolve.c
-+++ gcc/fortran/resolve.c
-@@ -932,16 +932,9 @@ static void
- resolve_common_vars (gfc_common_head *common_block, bool named_common)
- {
-   gfc_symbol *csym = common_block->head;
--  gfc_gsymbol *gsym;
- 
-   for (; csym; csym = csym->common_next)
-     {
--      gsym = gfc_find_gsymbol (gfc_gsym_root, csym->name);
--      if (gsym && gsym->type != GSYM_UNKNOWN && gsym->type != GSYM_COMMON)
--	gfc_error_now ("Global entity %qs at %L cannot appear in a "
--			"COMMON block at %L", gsym->name,
--			&gsym->where, &csym->common_block->where);
--
-       /* gfc_add_in_common may have been called before, but the reported errors
- 	 have been ignored to continue parsing.
- 	 We do the checks again here.  */
diff --git a/SOURCES/gcc8-remove-old-demangle.patch b/SOURCES/gcc8-remove-old-demangle.patch
new file mode 100644
index 0000000..1abb8ed
--- /dev/null
+++ b/SOURCES/gcc8-remove-old-demangle.patch
@@ -0,0 +1,8227 @@
+commit 6c8120c5ff130e03d32ff15a8f0d0e703592a2af
+Author: Jason Merrill <jason@redhat.com>
+Date:   Sat Dec 22 19:06:34 2018 -0500
+
+    Remove support for demangling GCC 2.x era mangling schemes.
+    
+    libiberty/
+            * cplus-dem.c: Remove cplus_mangle_opname, cplus_demangle_opname,
+            internal_cplus_demangle, and all subroutines.
+            (libiberty_demanglers): Remove entries for ancient GNU (pre-3.0),
+            Lucid, ARM, HP, and EDG demangling styles.
+            (cplus_demangle): Remove 'work' variable.  Don't call
+            internal_cplus_demangle.
+    include/
+            * demangle.h: Remove support for ancient GNU (pre-3.0), Lucid,
+            ARM, HP, and EDG demangling styles.
+    
+    From-SVN: r267363
+
+--- include/demangle.h
++++ include/demangle.h
+@@ -53,20 +53,13 @@ extern "C" {
+ 					   */
+ 
+ #define DMGL_AUTO	 (1 << 8)
+-#define DMGL_GNU	 (1 << 9)
+-#define DMGL_LUCID	 (1 << 10)
+-#define DMGL_ARM	 (1 << 11)
+-#define DMGL_HP 	 (1 << 12)       /* For the HP aCC compiler;
+-                                            same as ARM except for
+-                                            template arguments, etc. */
+-#define DMGL_EDG	 (1 << 13)
+ #define DMGL_GNU_V3	 (1 << 14)
+ #define DMGL_GNAT	 (1 << 15)
+ #define DMGL_DLANG	 (1 << 16)
+ #define DMGL_RUST	 (1 << 17)	/* Rust wraps GNU_V3 style mangling.  */
+ 
+ /* If none of these are set, use 'current_demangling_style' as the default. */
+-#define DMGL_STYLE_MASK (DMGL_AUTO|DMGL_GNU|DMGL_LUCID|DMGL_ARM|DMGL_HP|DMGL_EDG|DMGL_GNU_V3|DMGL_JAVA|DMGL_GNAT|DMGL_DLANG|DMGL_RUST)
++#define DMGL_STYLE_MASK (DMGL_AUTO|DMGL_GNU_V3|DMGL_JAVA|DMGL_GNAT|DMGL_DLANG|DMGL_RUST)
+ 
+ /* Enumeration of possible demangling styles.
+ 
+@@ -92,11 +85,6 @@ extern enum demangling_styles
+   no_demangling = -1,
+   unknown_demangling = 0,
+   auto_demangling = DMGL_AUTO,
+-  gnu_demangling = DMGL_GNU,
+-  lucid_demangling = DMGL_LUCID,
+-  arm_demangling = DMGL_ARM,
+-  hp_demangling = DMGL_HP,
+-  edg_demangling = DMGL_EDG,
+   gnu_v3_demangling = DMGL_GNU_V3,
+   java_demangling = DMGL_JAVA,
+   gnat_demangling = DMGL_GNAT,
+@@ -108,11 +96,6 @@ extern enum demangling_styles
+ 
+ #define NO_DEMANGLING_STYLE_STRING            "none"
+ #define AUTO_DEMANGLING_STYLE_STRING	      "auto"
+-#define GNU_DEMANGLING_STYLE_STRING    	      "gnu"
+-#define LUCID_DEMANGLING_STYLE_STRING	      "lucid"
+-#define ARM_DEMANGLING_STYLE_STRING	      "arm"
+-#define HP_DEMANGLING_STYLE_STRING	      "hp"
+-#define EDG_DEMANGLING_STYLE_STRING	      "edg"
+ #define GNU_V3_DEMANGLING_STYLE_STRING        "gnu-v3"
+ #define JAVA_DEMANGLING_STYLE_STRING          "java"
+ #define GNAT_DEMANGLING_STYLE_STRING          "gnat"
+@@ -123,11 +106,6 @@ extern enum demangling_styles
+ 
+ #define CURRENT_DEMANGLING_STYLE current_demangling_style
+ #define AUTO_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_AUTO)
+-#define GNU_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU)
+-#define LUCID_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_LUCID)
+-#define ARM_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_ARM)
+-#define HP_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_HP)
+-#define EDG_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_EDG)
+ #define GNU_V3_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU_V3)
+ #define JAVA_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_JAVA)
+ #define GNAT_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNAT)
+@@ -147,17 +125,8 @@ extern const struct demangler_engine
+ extern char *
+ cplus_demangle (const char *mangled, int options);
+ 
+-extern int
+-cplus_demangle_opname (const char *opname, char *result, int options);
+-
+-extern const char *
+-cplus_mangle_opname (const char *opname, int options);
+-
+ /* Note: This sets global state.  FIXME if you care about multi-threading. */
+ 
+-extern void
+-set_cplus_marker_for_demangling (int ch);
+-
+ extern enum demangling_styles
+ cplus_demangle_set_style (enum demangling_styles style);
+ 
+--- libiberty/cplus-dem.c
++++ libiberty/cplus-dem.c
+@@ -29,12 +29,6 @@ License along with libiberty; see the file COPYING.LIB.  If
+ not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA.  */
+ 
+-/* This file exports two functions; cplus_mangle_opname and cplus_demangle.
+-
+-   This file imports xmalloc and xrealloc, which are like malloc and
+-   realloc except that they generate a fatal error if there is no
+-   available memory.  */
+-
+ /* This file lives in both GCC and libiberty.  When making changes, please
+    try not to break either.  */
+ 
+@@ -44,9 +38,7 @@ Boston, MA 02110-1301, USA.  */
+ 
+ #include "safe-ctype.h"
+ 
+-#include <sys/types.h>
+ #include <string.h>
+-#include <stdio.h>
+ 
+ #ifdef HAVE_STDLIB_H
+ #include <stdlib.h>
+@@ -55,204 +47,14 @@ void * malloc ();
+ void * realloc ();
+ #endif
+ 
+-#ifdef HAVE_LIMITS_H
+-#include <limits.h>
+-#endif
+-#ifndef INT_MAX
+-# define INT_MAX       (int)(((unsigned int) ~0) >> 1)          /* 0x7FFFFFFF */ 
+-#endif
+-
+ #include <demangle.h>
+ #undef CURRENT_DEMANGLING_STYLE
+-#define CURRENT_DEMANGLING_STYLE work->options
++#define CURRENT_DEMANGLING_STYLE options
+ 
+ #include "libiberty.h"
+ 
+-#define min(X,Y) (((X) < (Y)) ? (X) : (Y))
+-
+-/* A value at least one greater than the maximum number of characters
+-   that will be output when using the `%d' format with `printf'.  */
+-#define INTBUF_SIZE 32
+-
+-extern void fancy_abort (void) ATTRIBUTE_NORETURN;
+-
+-/* In order to allow a single demangler executable to demangle strings
+-   using various common values of CPLUS_MARKER, as well as any specific
+-   one set at compile time, we maintain a string containing all the
+-   commonly used ones, and check to see if the marker we are looking for
+-   is in that string.  CPLUS_MARKER is usually '$' on systems where the
+-   assembler can deal with that.  Where the assembler can't, it's usually
+-   '.' (but on many systems '.' is used for other things).  We put the
+-   current defined CPLUS_MARKER first (which defaults to '$'), followed
+-   by the next most common value, followed by an explicit '$' in case
+-   the value of CPLUS_MARKER is not '$'.
+-
+-   We could avoid this if we could just get g++ to tell us what the actual
+-   cplus marker character is as part of the debug information, perhaps by
+-   ensuring that it is the character that terminates the gcc<n>_compiled
+-   marker symbol (FIXME).  */
+-
+-#if !defined (CPLUS_MARKER)
+-#define CPLUS_MARKER '$'
+-#endif
+-
+ enum demangling_styles current_demangling_style = auto_demangling;
+ 
+-static char cplus_markers[] = { CPLUS_MARKER, '.', '$', '\0' };
+-
+-static char char_str[2] = { '\000', '\000' };
+-
+-void
+-set_cplus_marker_for_demangling (int ch)
+-{
+-  cplus_markers[0] = ch;
+-}
+-
+-typedef struct string		/* Beware: these aren't required to be */
+-{				/*  '\0' terminated.  */
+-  char *b;			/* pointer to start of string */
+-  char *p;			/* pointer after last character */
+-  char *e;			/* pointer after end of allocated space */
+-} string;
+-
+-/* Stuff that is shared between sub-routines.
+-   Using a shared structure allows cplus_demangle to be reentrant.  */
+-
+-struct work_stuff
+-{
+-  int options;
+-  char **typevec;
+-  char **ktypevec;
+-  char **btypevec;
+-  int numk;
+-  int numb;
+-  int ksize;
+-  int bsize;
+-  int ntypes;
+-  int typevec_size;
+-  int constructor;
+-  int destructor;
+-  int static_type;	/* A static member function */
+-  int temp_start;       /* index in demangled to start of template args */
+-  int type_quals;       /* The type qualifiers.  */
+-  int dllimported;	/* Symbol imported from a PE DLL */
+-  char **tmpl_argvec;   /* Template function arguments. */
+-  int ntmpl_args;       /* The number of template function arguments. */
+-  int forgetting_types; /* Nonzero if we are not remembering the types
+-			   we see.  */
+-  string* previous_argument; /* The last function argument demangled.  */
+-  int nrepeats;         /* The number of times to repeat the previous
+-			   argument.  */
+-  int *proctypevec;     /* Indices of currently processed remembered typevecs.  */
+-  int proctypevec_size;
+-  int nproctypes;
+-};
+-
+-#define PRINT_ANSI_QUALIFIERS (work -> options & DMGL_ANSI)
+-#define PRINT_ARG_TYPES       (work -> options & DMGL_PARAMS)
+-
+-static const struct optable
+-{
+-  const char *const in;
+-  const char *const out;
+-  const int flags;
+-} optable[] = {
+-  {"nw",	  " new",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+-  {"dl",	  " delete",	DMGL_ANSI},	/* new (1.92,	 ansi) */
+-  {"new",	  " new",	0},		/* old (1.91,	 and 1.x) */
+-  {"delete",	  " delete",	0},		/* old (1.91,	 and 1.x) */
+-  {"vn",	  " new []",	DMGL_ANSI},	/* GNU, pending ansi */
+-  {"vd",	  " delete []",	DMGL_ANSI},	/* GNU, pending ansi */
+-  {"as",	  "=",		DMGL_ANSI},	/* ansi */
+-  {"ne",	  "!=",		DMGL_ANSI},	/* old, ansi */
+-  {"eq",	  "==",		DMGL_ANSI},	/* old,	ansi */
+-  {"ge",	  ">=",		DMGL_ANSI},	/* old,	ansi */
+-  {"gt",	  ">",		DMGL_ANSI},	/* old,	ansi */
+-  {"le",	  "<=",		DMGL_ANSI},	/* old,	ansi */
+-  {"lt",	  "<",		DMGL_ANSI},	/* old,	ansi */
+-  {"plus",	  "+",		0},		/* old */
+-  {"pl",	  "+",		DMGL_ANSI},	/* ansi */
+-  {"apl",	  "+=",		DMGL_ANSI},	/* ansi */
+-  {"minus",	  "-",		0},		/* old */
+-  {"mi",	  "-",		DMGL_ANSI},	/* ansi */
+-  {"ami",	  "-=",		DMGL_ANSI},	/* ansi */
+-  {"mult",	  "*",		0},		/* old */
+-  {"ml",	  "*",		DMGL_ANSI},	/* ansi */
+-  {"amu",	  "*=",		DMGL_ANSI},	/* ansi (ARM/Lucid) */
+-  {"aml",	  "*=",		DMGL_ANSI},	/* ansi (GNU/g++) */
+-  {"convert",	  "+",		0},		/* old (unary +) */
+-  {"negate",	  "-",		0},		/* old (unary -) */
+-  {"trunc_mod",	  "%",		0},		/* old */
+-  {"md",	  "%",		DMGL_ANSI},	/* ansi */
+-  {"amd",	  "%=",		DMGL_ANSI},	/* ansi */
+-  {"trunc_div",	  "/",		0},		/* old */
+-  {"dv",	  "/",		DMGL_ANSI},	/* ansi */
+-  {"adv",	  "/=",		DMGL_ANSI},	/* ansi */
+-  {"truth_andif", "&&",		0},		/* old */
+-  {"aa",	  "&&",		DMGL_ANSI},	/* ansi */
+-  {"truth_orif",  "||",		0},		/* old */
+-  {"oo",	  "||",		DMGL_ANSI},	/* ansi */
+-  {"truth_not",	  "!",		0},		/* old */
+-  {"nt",	  "!",		DMGL_ANSI},	/* ansi */
+-  {"postincrement","++",	0},		/* old */
+-  {"pp",	  "++",		DMGL_ANSI},	/* ansi */
+-  {"postdecrement","--",	0},		/* old */
+-  {"mm",	  "--",		DMGL_ANSI},	/* ansi */
+-  {"bit_ior",	  "|",		0},		/* old */
+-  {"or",	  "|",		DMGL_ANSI},	/* ansi */
+-  {"aor",	  "|=",		DMGL_ANSI},	/* ansi */
+-  {"bit_xor",	  "^",		0},		/* old */
+-  {"er",	  "^",		DMGL_ANSI},	/* ansi */
+-  {"aer",	  "^=",		DMGL_ANSI},	/* ansi */
+-  {"bit_and",	  "&",		0},		/* old */
+-  {"ad",	  "&",		DMGL_ANSI},	/* ansi */
+-  {"aad",	  "&=",		DMGL_ANSI},	/* ansi */
+-  {"bit_not",	  "~",		0},		/* old */
+-  {"co",	  "~",		DMGL_ANSI},	/* ansi */
+-  {"call",	  "()",		0},		/* old */
+-  {"cl",	  "()",		DMGL_ANSI},	/* ansi */
+-  {"alshift",	  "<<",		0},		/* old */
+-  {"ls",	  "<<",		DMGL_ANSI},	/* ansi */
+-  {"als",	  "<<=",	DMGL_ANSI},	/* ansi */
+-  {"arshift",	  ">>",		0},		/* old */
+-  {"rs",	  ">>",		DMGL_ANSI},	/* ansi */
+-  {"ars",	  ">>=",	DMGL_ANSI},	/* ansi */
+-  {"component",	  "->",		0},		/* old */
+-  {"pt",	  "->",		DMGL_ANSI},	/* ansi; Lucid C++ form */
+-  {"rf",	  "->",		DMGL_ANSI},	/* ansi; ARM/GNU form */
+-  {"indirect",	  "*",		0},		/* old */
+-  {"method_call",  "->()",	0},		/* old */
+-  {"addr",	  "&",		0},		/* old (unary &) */
+-  {"array",	  "[]",		0},		/* old */
+-  {"vc",	  "[]",		DMGL_ANSI},	/* ansi */
+-  {"compound",	  ", ",		0},		/* old */
+-  {"cm",	  ", ",		DMGL_ANSI},	/* ansi */
+-  {"cond",	  "?:",		0},		/* old */
+-  {"cn",	  "?:",		DMGL_ANSI},	/* pseudo-ansi */
+-  {"max",	  ">?",		0},		/* old */
+-  {"mx",	  ">?",		DMGL_ANSI},	/* pseudo-ansi */
+-  {"min",	  "<?",		0},		/* old */
+-  {"mn",	  "<?",		DMGL_ANSI},	/* pseudo-ansi */
+-  {"nop",	  "",		0},		/* old (for operator=) */
+-  {"rm",	  "->*",	DMGL_ANSI},	/* ansi */
+-  {"sz",          "sizeof ",    DMGL_ANSI}      /* pseudo-ansi */
+-};
+-
+-/* These values are used to indicate the various type varieties.
+-   They are all non-zero so that they can be used as `success'
+-   values.  */
+-typedef enum type_kind_t
+-{
+-  tk_none,
+-  tk_pointer,
+-  tk_reference,
+-  tk_rvalue_reference,
+-  tk_integral,
+-  tk_bool,
+-  tk_char,
+-  tk_real
+-} type_kind_t;
+-
+ const struct demangler_engine libiberty_demanglers[] =
+ {
+   {
+@@ -268,39 +70,9 @@ const struct demangler_engine libiberty_
+   }
+   ,
+   {
+-    GNU_DEMANGLING_STYLE_STRING,
+-      gnu_demangling,
+-      "GNU (g++) style demangling"
+-  }
+-  ,
+-  {
+-    LUCID_DEMANGLING_STYLE_STRING,
+-      lucid_demangling,
+-      "Lucid (lcc) style demangling"
+-  }
+-  ,
+-  {
+-    ARM_DEMANGLING_STYLE_STRING,
+-      arm_demangling,
+-      "ARM style demangling"
+-  }
+-  ,
+-  {
+-    HP_DEMANGLING_STYLE_STRING,
+-      hp_demangling,
+-      "HP (aCC) style demangling"
+-  }
+-  ,
+-  {
+-    EDG_DEMANGLING_STYLE_STRING,
+-      edg_demangling,
+-      "EDG style demangling"
+-  }
+-  ,
+-  {
+     GNU_V3_DEMANGLING_STYLE_STRING,
+     gnu_v3_demangling,
+-    "GNU (g++) V3 ABI-style demangling"
++    "GNU (g++) V3 (Itanium C++ ABI) style demangling"
+   }
+   ,
+   {
+@@ -332,474 +104,6 @@ const struct demangler_engine libiberty_
+   }
+ };
+ 
+-#define STRING_EMPTY(str)	((str) -> b == (str) -> p)
+-#define APPEND_BLANK(str)	{if (!STRING_EMPTY(str)) \
+-    string_append(str, " ");}
+-#define LEN_STRING(str)         ( (STRING_EMPTY(str))?0:((str)->p - (str)->b))
+-
+-/* The scope separator appropriate for the language being demangled.  */
+-
+-#define SCOPE_STRING(work) ((work->options & DMGL_JAVA) ? "." : "::")
+-
+-#define ARM_VTABLE_STRING "__vtbl__"	/* Lucid/ARM virtual table prefix */
+-#define ARM_VTABLE_STRLEN 8		/* strlen (ARM_VTABLE_STRING) */
+-
+-/* Prototypes for local functions */
+-
+-static void delete_work_stuff (struct work_stuff *);
+-
+-static void delete_non_B_K_work_stuff (struct work_stuff *);
+-
+-static char *mop_up (struct work_stuff *, string *, int);
+-
+-static void squangle_mop_up (struct work_stuff *);
+-
+-static void work_stuff_copy_to_from (struct work_stuff *, struct work_stuff *);
+-
+-#if 0
+-static int
+-demangle_method_args (struct work_stuff *, const char **, string *);
+-#endif
+-
+-static char *
+-internal_cplus_demangle (struct work_stuff *, const char *);
+-
+-static int
+-demangle_template_template_parm (struct work_stuff *work,
+-                                 const char **, string *);
+-
+-static int
+-demangle_template (struct work_stuff *work, const char **, string *,
+-                   string *, int, int);
+-
+-static int
+-arm_pt (struct work_stuff *, const char *, int, const char **,
+-        const char **);
+-
+-static int
+-demangle_class_name (struct work_stuff *, const char **, string *);
+-
+-static int
+-demangle_qualified (struct work_stuff *, const char **, string *,
+-                    int, int);
+-
+-static int demangle_class (struct work_stuff *, const char **, string *);
+-
+-static int demangle_fund_type (struct work_stuff *, const char **, string *);
+-
+-static int demangle_signature (struct work_stuff *, const char **, string *);
+-
+-static int demangle_prefix (struct work_stuff *, const char **, string *);
+-
+-static int gnu_special (struct work_stuff *, const char **, string *);
+-
+-static int arm_special (const char **, string *);
+-
+-static void string_need (string *, int);
+-
+-static void string_delete (string *);
+-
+-static void
+-string_init (string *);
+-
+-static void string_clear (string *);
+-
+-#if 0
+-static int string_empty (string *);
+-#endif
+-
+-static void string_append (string *, const char *);
+-
+-static void string_appends (string *, string *);
+-
+-static void string_appendn (string *, const char *, int);
+-
+-static void string_prepend (string *, const char *);
+-
+-static void string_prependn (string *, const char *, int);
+-
+-static void string_append_template_idx (string *, int);
+-
+-static int get_count (const char **, int *);
+-
+-static int consume_count (const char **);
+-
+-static int consume_count_with_underscores (const char**);
+-
+-static int demangle_args (struct work_stuff *, const char **, string *);
+-
+-static int demangle_nested_args (struct work_stuff*, const char**, string*);
+-
+-static int do_type (struct work_stuff *, const char **, string *);
+-
+-static int do_arg (struct work_stuff *, const char **, string *);
+-
+-static int
+-demangle_function_name (struct work_stuff *, const char **, string *,
+-                        const char *);
+-
+-static int
+-iterate_demangle_function (struct work_stuff *,
+-                           const char **, string *, const char *);
+-
+-static void remember_type (struct work_stuff *, const char *, int);
+-
+-static void push_processed_type (struct work_stuff *, int);
+-
+-static void pop_processed_type (struct work_stuff *);
+-
+-static void remember_Btype (struct work_stuff *, const char *, int, int);
+-
+-static int register_Btype (struct work_stuff *);
+-
+-static void remember_Ktype (struct work_stuff *, const char *, int);
+-
+-static void forget_types (struct work_stuff *);
+-
+-static void forget_B_and_K_types (struct work_stuff *);
+-
+-static void string_prepends (string *, string *);
+-
+-static int
+-demangle_template_value_parm (struct work_stuff*, const char**,
+-                              string*, type_kind_t);
+-
+-static int
+-do_hpacc_template_const_value (struct work_stuff *, const char **, string *);
+-
+-static int
+-do_hpacc_template_literal (struct work_stuff *, const char **, string *);
+-
+-static int snarf_numeric_literal (const char **, string *);
+-
+-/* There is a TYPE_QUAL value for each type qualifier.  They can be
+-   combined by bitwise-or to form the complete set of qualifiers for a
+-   type.  */
+-
+-#define TYPE_UNQUALIFIED   0x0
+-#define TYPE_QUAL_CONST    0x1
+-#define TYPE_QUAL_VOLATILE 0x2
+-#define TYPE_QUAL_RESTRICT 0x4
+-
+-static int code_for_qualifier (int);
+-
+-static const char* qualifier_string (int);
+-
+-static const char* demangle_qualifier (int);
+-
+-static int demangle_expression (struct work_stuff *, const char **, string *, 
+-                                type_kind_t);
+-
+-static int
+-demangle_integral_value (struct work_stuff *, const char **, string *);
+-
+-static int
+-demangle_real_value (struct work_stuff *, const char **, string *);
+-
+-static void
+-demangle_arm_hp_template (struct work_stuff *, const char **, int, string *);
+-
+-static void
+-recursively_demangle (struct work_stuff *, const char **, string *, int);
+-
+-/* Translate count to integer, consuming tokens in the process.
+-   Conversion terminates on the first non-digit character.
+-
+-   Trying to consume something that isn't a count results in no
+-   consumption of input and a return of -1.
+-
+-   Overflow consumes the rest of the digits, and returns -1.  */
+-
+-static int
+-consume_count (const char **type)
+-{
+-  int count = 0;
+-
+-  if (! ISDIGIT ((unsigned char)**type))
+-    return -1;
+-
+-  while (ISDIGIT ((unsigned char)**type))
+-    {
+-      const int digit = **type - '0';
+-      /* Check for overflow.  */
+-      if (count > ((INT_MAX - digit) / 10))
+-	{
+-	  while (ISDIGIT ((unsigned char) **type))
+-	    (*type)++;
+-	  return -1;
+-	}
+-
+-      count *= 10;
+-      count += digit;
+-      (*type)++;
+-    }
+-
+-  if (count < 0)
+-    count = -1;
+-
+-  return (count);
+-}
+-
+-
+-/* Like consume_count, but for counts that are preceded and followed
+-   by '_' if they are greater than 10.  Also, -1 is returned for
+-   failure, since 0 can be a valid value.  */
+-
+-static int
+-consume_count_with_underscores (const char **mangled)
+-{
+-  int idx;
+-
+-  if (**mangled == '_')
+-    {
+-      (*mangled)++;
+-      if (!ISDIGIT ((unsigned char)**mangled))
+-	return -1;
+-
+-      idx = consume_count (mangled);
+-      if (**mangled != '_')
+-	/* The trailing underscore was missing. */
+-	return -1;
+-
+-      (*mangled)++;
+-    }
+-  else
+-    {
+-      if (**mangled < '0' || **mangled > '9')
+-	return -1;
+-
+-      idx = **mangled - '0';
+-      (*mangled)++;
+-    }
+-
+-  return idx;
+-}
+-
+-/* C is the code for a type-qualifier.  Return the TYPE_QUAL
+-   corresponding to this qualifier.  */
+-
+-static int
+-code_for_qualifier (int c)
+-{
+-  switch (c)
+-    {
+-    case 'C':
+-      return TYPE_QUAL_CONST;
+-
+-    case 'V':
+-      return TYPE_QUAL_VOLATILE;
+-
+-    case 'u':
+-      return TYPE_QUAL_RESTRICT;
+-
+-    default:
+-      break;
+-    }
+-
+-  /* C was an invalid qualifier.  */
+-  abort ();
+-}
+-
+-/* Return the string corresponding to the qualifiers given by
+-   TYPE_QUALS.  */
+-
+-static const char*
+-qualifier_string (int type_quals)
+-{
+-  switch (type_quals)
+-    {
+-    case TYPE_UNQUALIFIED:
+-      return "";
+-
+-    case TYPE_QUAL_CONST:
+-      return "const";
+-
+-    case TYPE_QUAL_VOLATILE:
+-      return "volatile";
+-
+-    case TYPE_QUAL_RESTRICT:
+-      return "__restrict";
+-
+-    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE:
+-      return "const volatile";
+-
+-    case TYPE_QUAL_CONST | TYPE_QUAL_RESTRICT:
+-      return "const __restrict";
+-
+-    case TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+-      return "volatile __restrict";
+-
+-    case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT:
+-      return "const volatile __restrict";
+-
+-    default:
+-      break;
+-    }
+-
+-  /* TYPE_QUALS was an invalid qualifier set.  */
+-  abort ();
+-}
+-
+-/* C is the code for a type-qualifier.  Return the string
+-   corresponding to this qualifier.  This function should only be
+-   called with a valid qualifier code.  */
+-
+-static const char*
+-demangle_qualifier (int c)
+-{
+-  return qualifier_string (code_for_qualifier (c));
+-}
+-
+-int
+-cplus_demangle_opname (const char *opname, char *result, int options)
+-{
+-  int len, len1, ret;
+-  string type;
+-  struct work_stuff work[1];
+-  const char *tem;
+-
+-  len = strlen(opname);
+-  result[0] = '\0';
+-  ret = 0;
+-  memset ((char *) work, 0, sizeof (work));
+-  work->options = options;
+-
+-  if (opname[0] == '_' && opname[1] == '_'
+-      && opname[2] == 'o' && opname[3] == 'p')
+-    {
+-      /* ANSI.  */
+-      /* type conversion operator.  */
+-      tem = opname + 4;
+-      if (do_type (work, &tem, &type))
+-	{
+-	  strcat (result, "operator ");
+-	  strncat (result, type.b, type.p - type.b);
+-	  string_delete (&type);
+-	  ret = 1;
+-	}
+-    }
+-  else if (opname[0] == '_' && opname[1] == '_'
+-	   && ISLOWER((unsigned char)opname[2])
+-	   && ISLOWER((unsigned char)opname[3]))
+-    {
+-      if (opname[4] == '\0')
+-	{
+-	  /* Operator.  */
+-	  size_t i;
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      if (strlen (optable[i].in) == 2
+-		  && memcmp (optable[i].in, opname + 2, 2) == 0)
+-		{
+-		  strcat (result, "operator");
+-		  strcat (result, optable[i].out);
+-		  ret = 1;
+-		  break;
+-		}
+-	    }
+-	}
+-      else
+-	{
+-	  if (opname[2] == 'a' && opname[5] == '\0')
+-	    {
+-	      /* Assignment.  */
+-	      size_t i;
+-	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+-		{
+-		  if (strlen (optable[i].in) == 3
+-		      && memcmp (optable[i].in, opname + 2, 3) == 0)
+-		    {
+-		      strcat (result, "operator");
+-		      strcat (result, optable[i].out);
+-		      ret = 1;
+-		      break;
+-		    }
+-		}
+-	    }
+-	}
+-    }
+-  else if (len >= 3
+-	   && opname[0] == 'o'
+-	   && opname[1] == 'p'
+-	   && strchr (cplus_markers, opname[2]) != NULL)
+-    {
+-      /* see if it's an assignment expression */
+-      if (len >= 10 /* op$assign_ */
+-	  && memcmp (opname + 3, "assign_", 7) == 0)
+-	{
+-	  size_t i;
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      len1 = len - 10;
+-	      if ((int) strlen (optable[i].in) == len1
+-		  && memcmp (optable[i].in, opname + 10, len1) == 0)
+-		{
+-		  strcat (result, "operator");
+-		  strcat (result, optable[i].out);
+-		  strcat (result, "=");
+-		  ret = 1;
+-		  break;
+-		}
+-	    }
+-	}
+-      else
+-	{
+-	  size_t i;
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      len1 = len - 3;
+-	      if ((int) strlen (optable[i].in) == len1
+-		  && memcmp (optable[i].in, opname + 3, len1) == 0)
+-		{
+-		  strcat (result, "operator");
+-		  strcat (result, optable[i].out);
+-		  ret = 1;
+-		  break;
+-		}
+-	    }
+-	}
+-    }
+-  else if (len >= 5 && memcmp (opname, "type", 4) == 0
+-	   && strchr (cplus_markers, opname[4]) != NULL)
+-    {
+-      /* type conversion operator */
+-      tem = opname + 5;
+-      if (do_type (work, &tem, &type))
+-	{
+-	  strcat (result, "operator ");
+-	  strncat (result, type.b, type.p - type.b);
+-	  string_delete (&type);
+-	  ret = 1;
+-	}
+-    }
+-  squangle_mop_up (work);
+-  return ret;
+-
+-}
+-
+-/* Takes operator name as e.g. "++" and returns mangled
+-   operator name (e.g. "postincrement_expr"), or NULL if not found.
+-
+-   If OPTIONS & DMGL_ANSI == 1, return the ANSI name;
+-   if OPTIONS & DMGL_ANSI == 0, return the old GNU name.  */
+-
+-const char *
+-cplus_mangle_opname (const char *opname, int options)
+-{
+-  size_t i;
+-  int len;
+-
+-  len = strlen (opname);
+-  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-    {
+-      if ((int) strlen (optable[i].out) == len
+-	  && (options & DMGL_ANSI) == (optable[i].flags & DMGL_ANSI)
+-	  && memcmp (optable[i].out, opname, len) == 0)
+-	return optable[i].in;
+-    }
+-  return (0);
+-}
+-
+ /* Add a routine to set the demangling style to be sure it is valid and
+    allow for any demangler initialization that maybe necessary. */
+ 
+@@ -840,22 +144,6 @@ cplus_demangle_name_to_style (const char
+    It is the caller's responsibility to free the string which
+    is returned.
+ 
+-   The OPTIONS arg may contain one or more of the following bits:
+-
+-   	DMGL_ANSI	ANSI qualifiers such as `const' and `void' are
+-			included.
+-	DMGL_PARAMS	Function parameters are included.
+-
+-   For example,
+-
+-   cplus_demangle ("foo__1Ai", DMGL_PARAMS)		=> "A::foo(int)"
+-   cplus_demangle ("foo__1Ai", DMGL_PARAMS | DMGL_ANSI)	=> "A::foo(int)"
+-   cplus_demangle ("foo__1Ai", 0)			=> "A::foo"
+-
+-   cplus_demangle ("foo__1Afe", DMGL_PARAMS)		=> "A::foo(float,...)"
+-   cplus_demangle ("foo__1Afe", DMGL_PARAMS | DMGL_ANSI)=> "A::foo(float,...)"
+-   cplus_demangle ("foo__1Afe", 0)			=> "A::foo"
+-
+    Note that any leading underscores, or other such characters prepended by
+    the compilation system, are presumed to have already been stripped from
+    MANGLED.  */
+@@ -864,20 +152,17 @@ char *
+ cplus_demangle (const char *mangled, int options)
+ {
+   char *ret;
+-  struct work_stuff work[1];
+ 
+   if (current_demangling_style == no_demangling)
+     return xstrdup (mangled);
+ 
+-  memset ((char *) work, 0, sizeof (work));
+-  work->options = options;
+-  if ((work->options & DMGL_STYLE_MASK) == 0)
+-    work->options |= (int) current_demangling_style & DMGL_STYLE_MASK;
++  if ((options & DMGL_STYLE_MASK) == 0)
++    options |= (int) current_demangling_style & DMGL_STYLE_MASK;
+ 
+   /* The V3 ABI demangling is implemented elsewhere.  */
+   if (GNU_V3_DEMANGLING || RUST_DEMANGLING || AUTO_DEMANGLING)
+     {
+-      ret = cplus_demangle_v3 (mangled, work->options);
++      ret = cplus_demangle_v3 (mangled, options);
+       if (GNU_V3_DEMANGLING)
+ 	return ret;
+ 
+@@ -915,8 +200,6 @@ cplus_demangle (const char *mangled, int
+ 	return ret;
+     }
+ 
+-  ret = internal_cplus_demangle (work, mangled);
+-  squangle_mop_up (work);
+   return (ret);
+ }
+ 
+@@ -1205,3828 +488,3 @@ ada_demangle (const char *mangled, int o
+ 
+   return demangled;
+ }
+-
+-/* This function performs most of what cplus_demangle use to do, but
+-   to be able to demangle a name with a B, K or n code, we need to
+-   have a longer term memory of what types have been seen. The original
+-   now initializes and cleans up the squangle code info, while internal
+-   calls go directly to this routine to avoid resetting that info. */
+-
+-static char *
+-internal_cplus_demangle (struct work_stuff *work, const char *mangled)
+-{
+-
+-  string decl;
+-  int success = 0;
+-  char *demangled = NULL;
+-  int s1, s2, s3, s4;
+-  s1 = work->constructor;
+-  s2 = work->destructor;
+-  s3 = work->static_type;
+-  s4 = work->type_quals;
+-  work->constructor = work->destructor = 0;
+-  work->type_quals = TYPE_UNQUALIFIED;
+-  work->dllimported = 0;
+-
+-  if ((mangled != NULL) && (*mangled != '\0'))
+-    {
+-      string_init (&decl);
+-
+-      /* First check to see if gnu style demangling is active and if the
+-	 string to be demangled contains a CPLUS_MARKER.  If so, attempt to
+-	 recognize one of the gnu special forms rather than looking for a
+-	 standard prefix.  In particular, don't worry about whether there
+-	 is a "__" string in the mangled string.  Consider "_$_5__foo" for
+-	 example.  */
+-
+-      if ((AUTO_DEMANGLING || GNU_DEMANGLING))
+-	{
+-	  success = gnu_special (work, &mangled, &decl);
+-	  if (!success)
+-	    {
+-	      delete_work_stuff (work);
+-	      string_delete (&decl);
+-	    }
+-	}
+-      if (!success)
+-	{
+-	  success = demangle_prefix (work, &mangled, &decl);
+-	}
+-      if (success && (*mangled != '\0'))
+-	{
+-	  success = demangle_signature (work, &mangled, &decl);
+-	}
+-      if (work->constructor == 2)
+-        {
+-          string_prepend (&decl, "global constructors keyed to ");
+-          work->constructor = 0;
+-        }
+-      else if (work->destructor == 2)
+-        {
+-          string_prepend (&decl, "global destructors keyed to ");
+-          work->destructor = 0;
+-        }
+-      else if (work->dllimported == 1)
+-        {
+-          string_prepend (&decl, "import stub for ");
+-          work->dllimported = 0;
+-        }
+-      demangled = mop_up (work, &decl, success);
+-    }
+-  work->constructor = s1;
+-  work->destructor = s2;
+-  work->static_type = s3;
+-  work->type_quals = s4;
+-  return demangled;
+-}
+-
+-
+-/* Clear out and squangling related storage */
+-static void
+-squangle_mop_up (struct work_stuff *work)
+-{
+-  /* clean up the B and K type mangling types. */
+-  forget_B_and_K_types (work);
+-  if (work -> btypevec != NULL)
+-    {
+-      free ((char *) work -> btypevec);
+-      work->btypevec = NULL;
+-      work->bsize = 0;
+-    }
+-  if (work -> ktypevec != NULL)
+-    {
+-      free ((char *) work -> ktypevec);
+-      work->ktypevec = NULL;
+-      work->ksize = 0;
+-    }
+-}
+-
+-
+-/* Copy the work state and storage.  */
+-
+-static void
+-work_stuff_copy_to_from (struct work_stuff *to, struct work_stuff *from)
+-{
+-  int i;
+-
+-  delete_work_stuff (to);
+-
+-  /* Shallow-copy scalars.  */
+-  memcpy (to, from, sizeof (*to));
+-
+-  /* Deep-copy dynamic storage.  */
+-  if (from->typevec_size)
+-    to->typevec = XNEWVEC (char *, from->typevec_size);
+-
+-  for (i = 0; i < from->ntypes; i++)
+-    {
+-      int len = strlen (from->typevec[i]) + 1;
+-
+-      to->typevec[i] = XNEWVEC (char, len);
+-      memcpy (to->typevec[i], from->typevec[i], len);
+-    }
+-
+-  if (from->ksize)
+-    to->ktypevec = XNEWVEC (char *, from->ksize);
+-
+-  for (i = 0; i < from->numk; i++)
+-    {
+-      int len = strlen (from->ktypevec[i]) + 1;
+-
+-      to->ktypevec[i] = XNEWVEC (char, len);
+-      memcpy (to->ktypevec[i], from->ktypevec[i], len);
+-    }
+-
+-  if (from->bsize)
+-    to->btypevec = XNEWVEC (char *, from->bsize);
+-
+-  for (i = 0; i < from->numb; i++)
+-    {
+-      int len = strlen (from->btypevec[i]) + 1;
+-
+-      to->btypevec[i] = XNEWVEC (char , len);
+-      memcpy (to->btypevec[i], from->btypevec[i], len);
+-    }
+-
+-  if (from->proctypevec)
+-    to->proctypevec =
+-      XDUPVEC (int, from->proctypevec, from->proctypevec_size);
+-
+-  if (from->ntmpl_args)
+-    to->tmpl_argvec = XNEWVEC (char *, from->ntmpl_args);
+-
+-  for (i = 0; i < from->ntmpl_args; i++)
+-    {
+-      int len = strlen (from->tmpl_argvec[i]) + 1;
+-
+-      to->tmpl_argvec[i] = XNEWVEC (char, len);
+-      memcpy (to->tmpl_argvec[i], from->tmpl_argvec[i], len);
+-    }
+-
+-  if (from->previous_argument)
+-    {
+-      to->previous_argument = XNEW (string);
+-      string_init (to->previous_argument);
+-      string_appends (to->previous_argument, from->previous_argument);
+-    }
+-}
+-
+-
+-/* Delete dynamic stuff in work_stuff that is not to be re-used.  */
+-
+-static void
+-delete_non_B_K_work_stuff (struct work_stuff *work)
+-{
+-  /* Discard the remembered types, if any.  */
+-
+-  forget_types (work);
+-  if (work->typevec != NULL)
+-    {
+-      free ((char *) work->typevec);
+-      work->typevec = NULL;
+-      work->typevec_size = 0;
+-    }
+-  if (work->proctypevec != NULL)
+-    {
+-      free (work->proctypevec);
+-      work->proctypevec = NULL;
+-      work->proctypevec_size = 0;
+-    }
+-  if (work->tmpl_argvec)
+-    {
+-      int i;
+-
+-      for (i = 0; i < work->ntmpl_args; i++)
+-	free ((char*) work->tmpl_argvec[i]);
+-
+-      free ((char*) work->tmpl_argvec);
+-      work->tmpl_argvec = NULL;
+-    }
+-  if (work->previous_argument)
+-    {
+-      string_delete (work->previous_argument);
+-      free ((char*) work->previous_argument);
+-      work->previous_argument = NULL;
+-    }
+-}
+-
+-
+-/* Delete all dynamic storage in work_stuff.  */
+-static void
+-delete_work_stuff (struct work_stuff *work)
+-{
+-  delete_non_B_K_work_stuff (work);
+-  squangle_mop_up (work);
+-}
+-
+-
+-/* Clear out any mangled storage */
+-
+-static char *
+-mop_up (struct work_stuff *work, string *declp, int success)
+-{
+-  char *demangled = NULL;
+-
+-  delete_non_B_K_work_stuff (work);
+-
+-  /* If demangling was successful, ensure that the demangled string is null
+-     terminated and return it.  Otherwise, free the demangling decl.  */
+-
+-  if (!success)
+-    {
+-      string_delete (declp);
+-    }
+-  else
+-    {
+-      string_appendn (declp, "", 1);
+-      demangled = declp->b;
+-    }
+-  return (demangled);
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	demangle_signature -- demangle the signature part of a mangled name
+-
+-SYNOPSIS
+-
+-	static int
+-	demangle_signature (struct work_stuff *work, const char **mangled,
+-			    string *declp);
+-
+-DESCRIPTION
+-
+-	Consume and demangle the signature portion of the mangled name.
+-
+-	DECLP is the string where demangled output is being built.  At
+-	entry it contains the demangled root name from the mangled name
+-	prefix.  I.E. either a demangled operator name or the root function
+-	name.  In some special cases, it may contain nothing.
+-
+-	*MANGLED points to the current unconsumed location in the mangled
+-	name.  As tokens are consumed and demangling is performed, the
+-	pointer is updated to continuously point at the next token to
+-	be consumed.
+-
+-	Demangling GNU style mangled names is nasty because there is no
+-	explicit token that marks the start of the outermost function
+-	argument list.  */
+-
+-static int
+-demangle_signature (struct work_stuff *work,
+-                    const char **mangled, string *declp)
+-{
+-  int success = 1;
+-  int func_done = 0;
+-  int expect_func = 0;
+-  int expect_return_type = 0;
+-  const char *oldmangled = NULL;
+-  string trawname;
+-  string tname;
+-
+-  while (success && (**mangled != '\0'))
+-    {
+-      switch (**mangled)
+-	{
+-	case 'Q':
+-	  oldmangled = *mangled;
+-	  success = demangle_qualified (work, mangled, declp, 1, 0);
+-	  if (success)
+-	    remember_type (work, oldmangled, *mangled - oldmangled);
+-	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	    expect_func = 1;
+-	  oldmangled = NULL;
+-	  break;
+-
+-        case 'K':
+-	  oldmangled = *mangled;
+-	  success = demangle_qualified (work, mangled, declp, 1, 0);
+-	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	    {
+-	      expect_func = 1;
+-	    }
+-	  oldmangled = NULL;
+-	  break;
+-
+-	case 'S':
+-	  /* Static member function */
+-	  if (oldmangled == NULL)
+-	    {
+-	      oldmangled = *mangled;
+-	    }
+-	  (*mangled)++;
+-	  work -> static_type = 1;
+-	  break;
+-
+-	case 'C':
+-	case 'V':
+-	case 'u':
+-	  work->type_quals |= code_for_qualifier (**mangled);
+-
+-	  /* a qualified member function */
+-	  if (oldmangled == NULL)
+-	    oldmangled = *mangled;
+-	  (*mangled)++;
+-	  break;
+-
+-	case 'L':
+-	  /* Local class name follows after "Lnnn_" */
+-	  if (HP_DEMANGLING)
+-	    {
+-	      while (**mangled && (**mangled != '_'))
+-		(*mangled)++;
+-	      if (!**mangled)
+-		success = 0;
+-	      else
+-		(*mangled)++;
+-	    }
+-	  else
+-	    success = 0;
+-	  break;
+-
+-	case '0': case '1': case '2': case '3': case '4':
+-	case '5': case '6': case '7': case '8': case '9':
+-	  if (oldmangled == NULL)
+-	    {
+-	      oldmangled = *mangled;
+-	    }
+-          work->temp_start = -1; /* uppermost call to demangle_class */
+-	  success = demangle_class (work, mangled, declp);
+-	  if (success)
+-	    {
+-	      remember_type (work, oldmangled, *mangled - oldmangled);
+-	    }
+-	  if (AUTO_DEMANGLING || GNU_DEMANGLING || EDG_DEMANGLING)
+-	    {
+-              /* EDG and others will have the "F", so we let the loop cycle
+-                 if we are looking at one. */
+-              if (**mangled != 'F')
+-                 expect_func = 1;
+-	    }
+-	  oldmangled = NULL;
+-	  break;
+-
+-	case 'B':
+-	  {
+-	    string s;
+-	    success = do_type (work, mangled, &s);
+-	    if (success)
+-	      {
+-		string_append (&s, SCOPE_STRING (work));
+-		string_prepends (declp, &s);
+-		string_delete (&s);
+-	      }
+-	    oldmangled = NULL;
+-	    expect_func = 1;
+-	  }
+-	  break;
+-
+-	case 'F':
+-	  /* Function */
+-	  /* ARM/HP style demangling includes a specific 'F' character after
+-	     the class name.  For GNU style, it is just implied.  So we can
+-	     safely just consume any 'F' at this point and be compatible
+-	     with either style.  */
+-
+-	  oldmangled = NULL;
+-	  func_done = 1;
+-	  (*mangled)++;
+-
+-	  /* For lucid/ARM/HP style we have to forget any types we might
+-	     have remembered up to this point, since they were not argument
+-	     types.  GNU style considers all types seen as available for
+-	     back references.  See comment in demangle_args() */
+-
+-	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+-	    {
+-	      forget_types (work);
+-	    }
+-	  success = demangle_args (work, mangled, declp);
+-	  /* After picking off the function args, we expect to either
+-	     find the function return type (preceded by an '_') or the
+-	     end of the string. */
+-	  if (success && (AUTO_DEMANGLING || EDG_DEMANGLING) && **mangled == '_')
+-	    {
+-	      ++(*mangled);
+-              /* At this level, we do not care about the return type. */
+-              success = do_type (work, mangled, &tname);
+-              string_delete (&tname);
+-            }
+-
+-	  break;
+-
+-	case 't':
+-	  /* G++ Template */
+-	  string_init(&trawname);
+-	  string_init(&tname);
+-	  if (oldmangled == NULL)
+-	    {
+-	      oldmangled = *mangled;
+-	    }
+-	  success = demangle_template (work, mangled, &tname,
+-				       &trawname, 1, 1);
+-	  if (success)
+-	    {
+-	      remember_type (work, oldmangled, *mangled - oldmangled);
+-	    }
+-	  string_append (&tname, SCOPE_STRING (work));
+-
+-	  string_prepends(declp, &tname);
+-	  if (work -> destructor & 1)
+-	    {
+-	      string_prepend (&trawname, "~");
+-	      string_appends (declp, &trawname);
+-	      work->destructor -= 1;
+-	    }
+-	  if ((work->constructor & 1) || (work->destructor & 1))
+-	    {
+-	      string_appends (declp, &trawname);
+-	      work->constructor -= 1;
+-	    }
+-	  string_delete(&trawname);
+-	  string_delete(&tname);
+-	  oldmangled = NULL;
+-	  expect_func = 1;
+-	  break;
+-
+-	case '_':
+-	  if ((AUTO_DEMANGLING || GNU_DEMANGLING) && expect_return_type)
+-	    {
+-	      /* Read the return type. */
+-	      string return_type;
+-
+-	      (*mangled)++;
+-	      success = do_type (work, mangled, &return_type);
+-	      APPEND_BLANK (&return_type);
+-
+-	      string_prepends (declp, &return_type);
+-	      string_delete (&return_type);
+-	      break;
+-	    }
+-	  else
+-	    /* At the outermost level, we cannot have a return type specified,
+-	       so if we run into another '_' at this point we are dealing with
+-	       a mangled name that is either bogus, or has been mangled by
+-	       some algorithm we don't know how to deal with.  So just
+-	       reject the entire demangling.  */
+-            /* However, "_nnn" is an expected suffix for alternate entry point
+-               numbered nnn for a function, with HP aCC, so skip over that
+-               without reporting failure. pai/1997-09-04 */
+-            if (HP_DEMANGLING)
+-              {
+-                (*mangled)++;
+-                while (**mangled && ISDIGIT ((unsigned char)**mangled))
+-                  (*mangled)++;
+-              }
+-            else
+-	      success = 0;
+-	  break;
+-
+-	case 'H':
+-	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	    {
+-	      /* A G++ template function.  Read the template arguments. */
+-	      success = demangle_template (work, mangled, declp, 0, 0,
+-					   0);
+-	      if (!(work->constructor & 1))
+-		expect_return_type = 1;
+-	      if (!**mangled)
+-		success = 0;
+-	      else
+-	        (*mangled)++;
+-	      break;
+-	    }
+-	  /* fall through */
+-
+-	default:
+-	  if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	    {
+-	      /* Assume we have stumbled onto the first outermost function
+-		 argument token, and start processing args.  */
+-	      func_done = 1;
+-	      success = demangle_args (work, mangled, declp);
+-	    }
+-	  else
+-	    {
+-	      /* Non-GNU demanglers use a specific token to mark the start
+-		 of the outermost function argument tokens.  Typically 'F',
+-		 for ARM/HP-demangling, for example.  So if we find something
+-		 we are not prepared for, it must be an error.  */
+-	      success = 0;
+-	    }
+-	  break;
+-	}
+-      /*
+-	if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	*/
+-      {
+-	if (success && expect_func)
+-	  {
+-	    func_done = 1;
+-              if (LUCID_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING)
+-                {
+-                  forget_types (work);
+-                }
+-	    success = demangle_args (work, mangled, declp);
+-	    /* Since template include the mangling of their return types,
+-	       we must set expect_func to 0 so that we don't try do
+-	       demangle more arguments the next time we get here.  */
+-	    expect_func = 0;
+-	  }
+-      }
+-    }
+-  if (success && !func_done)
+-    {
+-      if (AUTO_DEMANGLING || GNU_DEMANGLING)
+-	{
+-	  /* With GNU style demangling, bar__3foo is 'foo::bar(void)', and
+-	     bar__3fooi is 'foo::bar(int)'.  We get here when we find the
+-	     first case, and need to ensure that the '(void)' gets added to
+-	     the current declp.  Note that with ARM/HP, the first case
+-	     represents the name of a static data member 'foo::bar',
+-	     which is in the current declp, so we leave it alone.  */
+-	  success = demangle_args (work, mangled, declp);
+-	}
+-    }
+-  if (success && PRINT_ARG_TYPES)
+-    {
+-      if (work->static_type)
+-	string_append (declp, " static");
+-      if (work->type_quals != TYPE_UNQUALIFIED)
+-	{
+-	  APPEND_BLANK (declp);
+-	  string_append (declp, qualifier_string (work->type_quals));
+-	}
+-    }
+-
+-  return (success);
+-}
+-
+-#if 0
+-
+-static int
+-demangle_method_args (struct work_stuff *work, const char **mangled,
+-                      string *declp)
+-{
+-  int success = 0;
+-
+-  if (work -> static_type)
+-    {
+-      string_append (declp, *mangled + 1);
+-      *mangled += strlen (*mangled);
+-      success = 1;
+-    }
+-  else
+-    {
+-      success = demangle_args (work, mangled, declp);
+-    }
+-  return (success);
+-}
+-
+-#endif
+-
+-static int
+-demangle_template_template_parm (struct work_stuff *work,
+-                                 const char **mangled, string *tname)
+-{
+-  int i;
+-  int r;
+-  int need_comma = 0;
+-  int success = 1;
+-  string temp;
+-
+-  string_append (tname, "template <");
+-  /* get size of template parameter list */
+-  if (get_count (mangled, &r))
+-    {
+-      for (i = 0; i < r; i++)
+-	{
+-	  if (need_comma)
+-	    {
+-	      string_append (tname, ", ");
+-	    }
+-
+-	    /* Z for type parameters */
+-	    if (**mangled == 'Z')
+-	      {
+-		(*mangled)++;
+-		string_append (tname, "class");
+-	      }
+-	      /* z for template parameters */
+-	    else if (**mangled == 'z')
+-	      {
+-		(*mangled)++;
+-		success =
+-		  demangle_template_template_parm (work, mangled, tname);
+-		if (!success)
+-		  {
+-		    break;
+-		  }
+-	      }
+-	    else
+-	      {
+-		/* temp is initialized in do_type */
+-		success = do_type (work, mangled, &temp);
+-		if (success)
+-		  {
+-		    string_appends (tname, &temp);
+-		  }
+-		string_delete(&temp);
+-		if (!success)
+-		  {
+-		    break;
+-		  }
+-	      }
+-	  need_comma = 1;
+-	}
+-
+-    }
+-  if (tname->p[-1] == '>')
+-    string_append (tname, " ");
+-  string_append (tname, "> class");
+-  return (success);
+-}
+-
+-static int
+-demangle_expression (struct work_stuff *work, const char **mangled,
+-                     string *s, type_kind_t tk)
+-{
+-  int need_operator = 0;
+-  int success;
+-
+-  success = 1;
+-  string_appendn (s, "(", 1);
+-  (*mangled)++;
+-  while (success && **mangled != 'W' && **mangled != '\0')
+-    {
+-      if (need_operator)
+-	{
+-	  size_t i;
+-	  size_t len;
+-
+-	  success = 0;
+-
+-	  len = strlen (*mangled);
+-
+-	  for (i = 0; i < ARRAY_SIZE (optable); ++i)
+-	    {
+-	      size_t l = strlen (optable[i].in);
+-
+-	      if (l <= len
+-		  && memcmp (optable[i].in, *mangled, l) == 0)
+-		{
+-		  string_appendn (s, " ", 1);
+-		  string_append (s, optable[i].out);
+-		  string_appendn (s, " ", 1);
+-		  success = 1;
+-		  (*mangled) += l;
+-		  break;
+-		}
+-	    }
+-
+-	  if (!success)
+-	    break;
+-	}
+-      else
+-	need_operator = 1;
+-
+-      success = demangle_template_value_parm (work, mangled, s, tk);
+-    }
+-
+-  if (**mangled != 'W')
+-    success = 0;
+-  else
+-    {
+-      string_appendn (s, ")", 1);
+-      (*mangled)++;
+-    }
+-
+-  return success;
+-}
+-
+-static int
+-demangle_integral_value (struct work_stuff *work,
+-                         const char **mangled, string *s)
+-{
+-  int success;
+-
+-  if (**mangled == 'E')
+-    success = demangle_expression (work, mangled, s, tk_integral);
+-  else if (**mangled == 'Q' || **mangled == 'K')
+-    success = demangle_qualified (work, mangled, s, 0, 1);
+-  else
+-    {
+-      int value;
+-
+-      /* By default, we let the number decide whether we shall consume an
+-	 underscore.  */
+-      int multidigit_without_leading_underscore = 0;
+-      int leave_following_underscore = 0;
+-
+-      success = 0;
+-
+-      if (**mangled == '_')
+-        {
+-	  if (mangled[0][1] == 'm')
+-	    {
+-	      /* Since consume_count_with_underscores does not handle the
+-		 `m'-prefix we must do it here, using consume_count and
+-		 adjusting underscores: we have to consume the underscore
+-		 matching the prepended one.  */
+-	      multidigit_without_leading_underscore = 1;
+-	      string_appendn (s, "-", 1);
+-	      (*mangled) += 2;
+-	    }
+-	  else
+-	    {
+-	      /* Do not consume a following underscore;
+-	         consume_count_with_underscores will consume what
+-	         should be consumed.  */
+-	      leave_following_underscore = 1;
+-	    }
+-	}
+-      else
+-	{
+-	  /* Negative numbers are indicated with a leading `m'.  */
+-	  if (**mangled == 'm')
+-	  {
+-	    string_appendn (s, "-", 1);
+-	    (*mangled)++;
+-	  }
+-	  /* Since consume_count_with_underscores does not handle
+-	     multi-digit numbers that do not start with an underscore,
+-	     and this number can be an integer template parameter,
+-	     we have to call consume_count. */
+-	  multidigit_without_leading_underscore = 1;
+-	  /* These multi-digit numbers never end on an underscore,
+-	     so if there is one then don't eat it. */
+-	  leave_following_underscore = 1;
+-	}
+-
+-      /* We must call consume_count if we expect to remove a trailing
+-	 underscore, since consume_count_with_underscores expects
+-	 the leading underscore (that we consumed) if it is to handle
+-	 multi-digit numbers.  */
+-      if (multidigit_without_leading_underscore)
+-	value = consume_count (mangled);
+-      else
+-	value = consume_count_with_underscores (mangled);
+-
+-      if (value != -1)
+-	{
+-	  char buf[INTBUF_SIZE];
+-	  sprintf (buf, "%d", value);
+-	  string_append (s, buf);
+-
+-	  /* Numbers not otherwise delimited, might have an underscore
+-	     appended as a delimeter, which we should skip.
+-
+-	     ??? This used to always remove a following underscore, which
+-	     is wrong.  If other (arbitrary) cases are followed by an
+-	     underscore, we need to do something more radical.  */
+-
+-	  if ((value > 9 || multidigit_without_leading_underscore)
+-	      && ! leave_following_underscore
+-	      && **mangled == '_')
+-	    (*mangled)++;
+-
+-	  /* All is well.  */
+-	  success = 1;
+-	}
+-      }
+-
+-  return success;
+-}
+-
+-/* Demangle the real value in MANGLED.  */
+-
+-static int
+-demangle_real_value (struct work_stuff *work,
+-                     const char **mangled, string *s)
+-{
+-  if (**mangled == 'E')
+-    return demangle_expression (work, mangled, s, tk_real);
+-
+-  if (**mangled == 'm')
+-    {
+-      string_appendn (s, "-", 1);
+-      (*mangled)++;
+-    }
+-  while (ISDIGIT ((unsigned char)**mangled))
+-    {
+-      string_appendn (s, *mangled, 1);
+-      (*mangled)++;
+-    }
+-  if (**mangled == '.') /* fraction */
+-    {
+-      string_appendn (s, ".", 1);
+-      (*mangled)++;
+-      while (ISDIGIT ((unsigned char)**mangled))
+-	{
+-	  string_appendn (s, *mangled, 1);
+-	  (*mangled)++;
+-	}
+-    }
+-  if (**mangled == 'e') /* exponent */
+-    {
+-      string_appendn (s, "e", 1);
+-      (*mangled)++;
+-      while (ISDIGIT ((unsigned char)**mangled))
+-	{
+-	  string_appendn (s, *mangled, 1);
+-	  (*mangled)++;
+-	}
+-    }
+-
+-  return 1;
+-}
+-
+-static int
+-demangle_template_value_parm (struct work_stuff *work, const char **mangled,
+-                              string *s, type_kind_t tk)
+-{
+-  int success = 1;
+-
+-  if (**mangled == 'Y')
+-    {
+-      /* The next argument is a template parameter. */
+-      int idx;
+-
+-      (*mangled)++;
+-      idx = consume_count_with_underscores (mangled);
+-      if (idx == -1
+-	  || (work->tmpl_argvec && idx >= work->ntmpl_args)
+-	  || consume_count_with_underscores (mangled) == -1)
+-	return -1;
+-      if (work->tmpl_argvec)
+-	string_append (s, work->tmpl_argvec[idx]);
+-      else
+-	string_append_template_idx (s, idx);
+-    }
+-  else if (tk == tk_integral)
+-    success = demangle_integral_value (work, mangled, s);
+-  else if (tk == tk_char)
+-    {
+-      char tmp[2];
+-      int val;
+-      if (**mangled == 'm')
+-	{
+-	  string_appendn (s, "-", 1);
+-	  (*mangled)++;
+-	}
+-      string_appendn (s, "'", 1);
+-      val = consume_count(mangled);
+-      if (val <= 0)
+-	success = 0;
+-      else
+-	{
+-	  tmp[0] = (char)val;
+-	  tmp[1] = '\0';
+-	  string_appendn (s, &tmp[0], 1);
+-	  string_appendn (s, "'", 1);
+-	}
+-    }
+-  else if (tk == tk_bool)
+-    {
+-      int val = consume_count (mangled);
+-      if (val == 0)
+-	string_appendn (s, "false", 5);
+-      else if (val == 1)
+-	string_appendn (s, "true", 4);
+-      else
+-	success = 0;
+-    }
+-  else if (tk == tk_real)
+-    success = demangle_real_value (work, mangled, s);
+-  else if (tk == tk_pointer || tk == tk_reference
+-	   || tk == tk_rvalue_reference)
+-    {
+-      if (**mangled == 'Q')
+-	success = demangle_qualified (work, mangled, s,
+-				      /*isfuncname=*/0, 
+-				      /*append=*/1);
+-      else
+-	{
+-	  int symbol_len  = consume_count (mangled);
+-	  if (symbol_len == -1
+-	      || symbol_len > (long) strlen (*mangled))
+-	    return -1;
+-	  if (symbol_len == 0)
+-	    string_appendn (s, "0", 1);
+-	  else
+-	    {
+-	      char *p = XNEWVEC (char, symbol_len + 1), *q;
+-	      strncpy (p, *mangled, symbol_len);
+-	      p [symbol_len] = '\0';
+-	      /* We use cplus_demangle here, rather than
+-		 internal_cplus_demangle, because the name of the entity
+-		 mangled here does not make use of any of the squangling
+-		 or type-code information we have built up thus far; it is
+-		 mangled independently.  */
+-	      q = cplus_demangle (p, work->options);
+-	      if (tk == tk_pointer)
+-		string_appendn (s, "&", 1);
+-	      /* FIXME: Pointer-to-member constants should get a
+-		 qualifying class name here.  */
+-	      if (q)
+-		{
+-		  string_append (s, q);
+-		  free (q);
+-		}
+-	      else
+-		string_append (s, p);
+-	      free (p);
+-	    }
+-	  *mangled += symbol_len;
+-	}
+-    }
+-
+-  return success;
+-}
+-
+-/* Demangle the template name in MANGLED.  The full name of the
+-   template (e.g., S<int>) is placed in TNAME.  The name without the
+-   template parameters (e.g. S) is placed in TRAWNAME if TRAWNAME is
+-   non-NULL.  If IS_TYPE is nonzero, this template is a type template,
+-   not a function template.  If both IS_TYPE and REMEMBER are nonzero,
+-   the template is remembered in the list of back-referenceable
+-   types.  */
+-
+-static int
+-demangle_template (struct work_stuff *work, const char **mangled,
+-                   string *tname, string *trawname,
+-                   int is_type, int remember)
+-{
+-  int i;
+-  int r;
+-  int need_comma = 0;
+-  int success = 0;
+-  int is_java_array = 0;
+-  string temp;
+-
+-  (*mangled)++;
+-  if (is_type)
+-    {
+-      /* get template name */
+-      if (**mangled == 'z')
+-	{
+-	  int idx;
+-	  (*mangled)++;
+-	  if (**mangled == '\0')
+-	    return (0);
+-	  (*mangled)++;
+-
+-	  idx = consume_count_with_underscores (mangled);
+-	  if (idx == -1
+-	      || (work->tmpl_argvec && idx >= work->ntmpl_args)
+-	      || consume_count_with_underscores (mangled) == -1)
+-	    return (0);
+-
+-	  if (work->tmpl_argvec)
+-	    {
+-	      string_append (tname, work->tmpl_argvec[idx]);
+-	      if (trawname)
+-		string_append (trawname, work->tmpl_argvec[idx]);
+-	    }
+-	  else
+-	    {
+-	      string_append_template_idx (tname, idx);
+-	      if (trawname)
+-		string_append_template_idx (trawname, idx);
+-	    }
+-	}
+-      else
+-	{
+-	  if ((r = consume_count (mangled)) <= 0
+-	      || (int) strlen (*mangled) < r)
+-	    {
+-	      return (0);
+-	    }
+-	  is_java_array = (work -> options & DMGL_JAVA)
+-	    && strncmp (*mangled, "JArray1Z", 8) == 0;
+-	  if (! is_java_array)
+-	    {
+-	      string_appendn (tname, *mangled, r);
+-	    }
+-	  if (trawname)
+-	    string_appendn (trawname, *mangled, r);
+-	  *mangled += r;
+-	}
+-    }
+-  if (!is_java_array)
+-    string_append (tname, "<");
+-  /* get size of template parameter list */
+-  if (!get_count (mangled, &r))
+-    {
+-      return (0);
+-    }
+-  if (!is_type)
+-    {
+-      /* Create an array for saving the template argument values. */
+-      work->tmpl_argvec = XNEWVEC (char *, r);
+-      work->ntmpl_args = r;
+-      for (i = 0; i < r; i++)
+-	work->tmpl_argvec[i] = 0;
+-    }
+-  for (i = 0; i < r; i++)
+-    {
+-      if (need_comma)
+-	{
+-	  string_append (tname, ", ");
+-	}
+-      /* Z for type parameters */
+-      if (**mangled == 'Z')
+-	{
+-	  (*mangled)++;
+-	  /* temp is initialized in do_type */
+-	  success = do_type (work, mangled, &temp);
+-	  if (success)
+-	    {
+-	      string_appends (tname, &temp);
+-
+-	      if (!is_type)
+-		{
+-		  /* Save the template argument. */
+-		  int len = temp.p - temp.b;
+-		  work->tmpl_argvec[i] = XNEWVEC (char, len + 1);
+-		  memcpy (work->tmpl_argvec[i], temp.b, len);
+-		  work->tmpl_argvec[i][len] = '\0';
+-		}
+-	    }
+-	  string_delete(&temp);
+-	  if (!success)
+-	    {
+-	      break;
+-	    }
+-	}
+-      /* z for template parameters */
+-      else if (**mangled == 'z')
+-	{
+-	  int r2;
+-	  (*mangled)++;
+-	  success = demangle_template_template_parm (work, mangled, tname);
+-
+-	  if (success
+-	      && (r2 = consume_count (mangled)) > 0
+-	      && (int) strlen (*mangled) >= r2)
+-	    {
+-	      string_append (tname, " ");
+-	      string_appendn (tname, *mangled, r2);
+-	      if (!is_type)
+-		{
+-		  /* Save the template argument. */
+-		  int len = r2;
+-		  work->tmpl_argvec[i] = XNEWVEC (char, len + 1);
+-		  memcpy (work->tmpl_argvec[i], *mangled, len);
+-		  work->tmpl_argvec[i][len] = '\0';
+-		}
+-	      *mangled += r2;
+-	    }
+-	  if (!success)
+-	    {
+-	      break;
+-	    }
+-	}
+-      else
+-	{
+-	  string  param;
+-	  string* s;
+-
+-	  /* otherwise, value parameter */
+-
+-	  /* temp is initialized in do_type */
+-	  success = do_type (work, mangled, &temp);
+-	  string_delete(&temp);
+-	  if (!success)
+-	    break;
+-
+-	  if (!is_type)
+-	    {
+-	      s = &param;
+-	      string_init (s);
+-	    }
+-	  else
+-	    s = tname;
+-
+-	  success = demangle_template_value_parm (work, mangled, s,
+-						  (type_kind_t) success);
+-
+-	  if (!success)
+-	    {
+-	      if (!is_type)
+-		string_delete (s);
+-	      success = 0;
+-	      break;
+-	    }
+-
+-	  if (!is_type)
+-	    {
+-	      int len = s->p - s->b;
+-	      work->tmpl_argvec[i] = XNEWVEC (char, len + 1);
+-	      memcpy (work->tmpl_argvec[i], s->b, len);
+-	      work->tmpl_argvec[i][len] = '\0';
+-
+-	      string_appends (tname, s);
+-	      string_delete (s);
+-	    }
+-	}
+-      need_comma = 1;
+-    }
+-  if (is_java_array)
+-    {
+-      string_append (tname, "[]");
+-    }
+-  else
+-    {
+-      if (tname->p[-1] == '>')
+-	string_append (tname, " ");
+-      string_append (tname, ">");
+-    }
+-
+-  if (is_type && remember)
+-    {
+-      const int bindex = register_Btype (work);
+-      remember_Btype (work, tname->b, LEN_STRING (tname), bindex);
+-    }
+-
+-  /*
+-    if (work -> static_type)
+-    {
+-    string_append (declp, *mangled + 1);
+-    *mangled += strlen (*mangled);
+-    success = 1;
+-    }
+-    else
+-    {
+-    success = demangle_args (work, mangled, declp);
+-    }
+-    }
+-    */
+-  return (success);
+-}
+-
+-static int
+-arm_pt (struct work_stuff *work, const char *mangled,
+-        int n, const char **anchor, const char **args)
+-{
+-  /* Check if ARM template with "__pt__" in it ("parameterized type") */
+-  /* Allow HP also here, because HP's cfront compiler follows ARM to some extent */
+-  if ((ARM_DEMANGLING || HP_DEMANGLING) && (*anchor = strstr (mangled, "__pt__")))
+-    {
+-      int len;
+-      *args = *anchor + 6;
+-      len = consume_count (args);
+-      if (len == -1)
+-	return 0;
+-      if (*args + len == mangled + n && **args == '_')
+-	{
+-	  ++*args;
+-	  return 1;
+-	}
+-    }
+-  if (AUTO_DEMANGLING || EDG_DEMANGLING)
+-    {
+-      if ((*anchor = strstr (mangled, "__tm__"))
+-          || (*anchor = strstr (mangled, "__ps__"))
+-          || (*anchor = strstr (mangled, "__pt__")))
+-        {
+-          int len;
+-          *args = *anchor + 6;
+-          len = consume_count (args);
+-	  if (len == -1)
+-	    return 0;
+-          if (*args + len == mangled + n && **args == '_')
+-            {
+-              ++*args;
+-              return 1;
+-            }
+-        }
+-      else if ((*anchor = strstr (mangled, "__S")))
+-        {
+- 	  int len;
+- 	  *args = *anchor + 3;
+- 	  len = consume_count (args);
+-	  if (len == -1)
+-	    return 0;
+- 	  if (*args + len == mangled + n && **args == '_')
+-            {
+-              ++*args;
+- 	      return 1;
+-            }
+-        }
+-    }
+-
+-  return 0;
+-}
+-
+-static void
+-demangle_arm_hp_template (struct work_stuff *work, const char **mangled,
+-                          int n, string *declp)
+-{
+-  const char *p;
+-  const char *args;
+-  const char *e = *mangled + n;
+-  string arg;
+-
+-  /* Check for HP aCC template spec: classXt1t2 where t1, t2 are
+-     template args */
+-  if (HP_DEMANGLING && ((*mangled)[n] == 'X'))
+-    {
+-      char *start_spec_args = NULL;
+-      int hold_options;
+-
+-      /* First check for and omit template specialization pseudo-arguments,
+-         such as in "Spec<#1,#1.*>" */
+-      start_spec_args = strchr (*mangled, '<');
+-      if (start_spec_args && (start_spec_args - *mangled < n))
+-        string_appendn (declp, *mangled, start_spec_args - *mangled);
+-      else
+-        string_appendn (declp, *mangled, n);
+-      (*mangled) += n + 1;
+-      string_init (&arg);
+-      if (work->temp_start == -1) /* non-recursive call */
+-        work->temp_start = declp->p - declp->b;
+-
+-      /* We want to unconditionally demangle parameter types in
+-	 template parameters.  */
+-      hold_options = work->options;
+-      work->options |= DMGL_PARAMS;
+-
+-      string_append (declp, "<");
+-      while (1)
+-        {
+-          string_delete (&arg);
+-          switch (**mangled)
+-            {
+-              case 'T':
+-                /* 'T' signals a type parameter */
+-                (*mangled)++;
+-                if (!do_type (work, mangled, &arg))
+-                  goto hpacc_template_args_done;
+-                break;
+-
+-              case 'U':
+-              case 'S':
+-                /* 'U' or 'S' signals an integral value */
+-                if (!do_hpacc_template_const_value (work, mangled, &arg))
+-                  goto hpacc_template_args_done;
+-                break;
+-
+-              case 'A':
+-                /* 'A' signals a named constant expression (literal) */
+-                if (!do_hpacc_template_literal (work, mangled, &arg))
+-                  goto hpacc_template_args_done;
+-                break;
+-
+-              default:
+-                /* Today, 1997-09-03, we have only the above types
+-                   of template parameters */
+-                /* FIXME: maybe this should fail and return null */
+-                goto hpacc_template_args_done;
+-            }
+-          string_appends (declp, &arg);
+-         /* Check if we're at the end of template args.
+-             0 if at end of static member of template class,
+-             _ if done with template args for a function */
+-          if ((**mangled == '\000') || (**mangled == '_'))
+-            break;
+-          else
+-            string_append (declp, ",");
+-        }
+-    hpacc_template_args_done:
+-      string_append (declp, ">");
+-      string_delete (&arg);
+-      if (**mangled == '_')
+-        (*mangled)++;
+-      work->options = hold_options;
+-      return;
+-    }
+-  /* ARM template? (Also handles HP cfront extensions) */
+-  else if (arm_pt (work, *mangled, n, &p, &args))
+-    {
+-      int hold_options;
+-      string type_str;
+-
+-      string_init (&arg);
+-      string_appendn (declp, *mangled, p - *mangled);
+-      if (work->temp_start == -1)  /* non-recursive call */
+-	work->temp_start = declp->p - declp->b;
+-
+-      /* We want to unconditionally demangle parameter types in
+-	 template parameters.  */
+-      hold_options = work->options;
+-      work->options |= DMGL_PARAMS;
+-
+-      string_append (declp, "<");
+-      /* should do error checking here */
+-      while (args < e) {
+-	string_delete (&arg);
+-
+-	/* Check for type or literal here */
+-	switch (*args)
+-	  {
+-	    /* HP cfront extensions to ARM for template args */
+-	    /* spec: Xt1Lv1 where t1 is a type, v1 is a literal value */
+-	    /* FIXME: We handle only numeric literals for HP cfront */
+-          case 'X':
+-            /* A typed constant value follows */
+-            args++;
+-            if (!do_type (work, &args, &type_str))
+-	      goto cfront_template_args_done;
+-            string_append (&arg, "(");
+-            string_appends (&arg, &type_str);
+-            string_delete (&type_str);
+-            string_append (&arg, ")");
+-            if (*args != 'L')
+-              goto cfront_template_args_done;
+-            args++;
+-            /* Now snarf a literal value following 'L' */
+-            if (!snarf_numeric_literal (&args, &arg))
+-	      goto cfront_template_args_done;
+-            break;
+-
+-          case 'L':
+-            /* Snarf a literal following 'L' */
+-            args++;
+-            if (!snarf_numeric_literal (&args, &arg))
+-	      goto cfront_template_args_done;
+-            break;
+-          default:
+-            /* Not handling other HP cfront stuff */
+-            {
+-              const char* old_args = args;
+-              if (!do_type (work, &args, &arg))
+-                goto cfront_template_args_done;
+-
+-              /* Fail if we didn't make any progress: prevent infinite loop. */
+-              if (args == old_args)
+-		{
+-		  work->options = hold_options;
+-		  return;
+-		}
+-            }
+-	  }
+-	string_appends (declp, &arg);
+-	string_append (declp, ",");
+-      }
+-    cfront_template_args_done:
+-      string_delete (&arg);
+-      if (args >= e)
+-	--declp->p; /* remove extra comma */
+-      string_append (declp, ">");
+-      work->options = hold_options;
+-    }
+-  else if (n>10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+-	   && (*mangled)[9] == 'N'
+-	   && (*mangled)[8] == (*mangled)[10]
+-	   && strchr (cplus_markers, (*mangled)[8]))
+-    {
+-      /* A member of the anonymous namespace.  */
+-      string_append (declp, "{anonymous}");
+-    }
+-  else
+-    {
+-      if (work->temp_start == -1) /* non-recursive call only */
+-	work->temp_start = 0;     /* disable in recursive calls */
+-      string_appendn (declp, *mangled, n);
+-    }
+-  *mangled += n;
+-}
+-
+-/* Extract a class name, possibly a template with arguments, from the
+-   mangled string; qualifiers, local class indicators, etc. have
+-   already been dealt with */
+-
+-static int
+-demangle_class_name (struct work_stuff *work, const char **mangled,
+-                     string *declp)
+-{
+-  int n;
+-  int success = 0;
+-
+-  n = consume_count (mangled);
+-  if (n == -1)
+-    return 0;
+-  if ((int) strlen (*mangled) >= n)
+-    {
+-      demangle_arm_hp_template (work, mangled, n, declp);
+-      success = 1;
+-    }
+-
+-  return (success);
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	demangle_class -- demangle a mangled class sequence
+-
+-SYNOPSIS
+-
+-	static int
+-	demangle_class (struct work_stuff *work, const char **mangled,
+-			strint *declp)
+-
+-DESCRIPTION
+-
+-	DECLP points to the buffer into which demangling is being done.
+-
+-	*MANGLED points to the current token to be demangled.  On input,
+-	it points to a mangled class (I.E. "3foo", "13verylongclass", etc.)
+-	On exit, it points to the next token after the mangled class on
+-	success, or the first unconsumed token on failure.
+-
+-	If the CONSTRUCTOR or DESTRUCTOR flags are set in WORK, then
+-	we are demangling a constructor or destructor.  In this case
+-	we prepend "class::class" or "class::~class" to DECLP.
+-
+-	Otherwise, we prepend "class::" to the current DECLP.
+-
+-	Reset the constructor/destructor flags once they have been
+-	"consumed".  This allows demangle_class to be called later during
+-	the same demangling, to do normal class demangling.
+-
+-	Returns 1 if demangling is successful, 0 otherwise.
+-
+-*/
+-
+-static int
+-demangle_class (struct work_stuff *work, const char **mangled, string *declp)
+-{
+-  int success = 0;
+-  int btype;
+-  string class_name;
+-  char *save_class_name_end = 0;
+-
+-  string_init (&class_name);
+-  btype = register_Btype (work);
+-  if (demangle_class_name (work, mangled, &class_name))
+-    {
+-      save_class_name_end = class_name.p;
+-      if ((work->constructor & 1) || (work->destructor & 1))
+-	{
+-          /* adjust so we don't include template args */
+-          if (work->temp_start && (work->temp_start != -1))
+-            {
+-              class_name.p = class_name.b + work->temp_start;
+-            }
+-	  string_prepends (declp, &class_name);
+-	  if (work -> destructor & 1)
+-	    {
+-	      string_prepend (declp, "~");
+-              work -> destructor -= 1;
+-	    }
+-	  else
+-	    {
+-	      work -> constructor -= 1;
+-	    }
+-	}
+-      class_name.p = save_class_name_end;
+-      remember_Ktype (work, class_name.b, LEN_STRING(&class_name));
+-      remember_Btype (work, class_name.b, LEN_STRING(&class_name), btype);
+-      string_prepend (declp, SCOPE_STRING (work));
+-      string_prepends (declp, &class_name);
+-      success = 1;
+-    }
+-  string_delete (&class_name);
+-  return (success);
+-}
+-
+-
+-/* Called when there's a "__" in the mangled name, with `scan' pointing to
+-   the rightmost guess.
+-
+-   Find the correct "__"-sequence where the function name ends and the
+-   signature starts, which is ambiguous with GNU mangling.
+-   Call demangle_signature here, so we can make sure we found the right
+-   one; *mangled will be consumed so caller will not make further calls to
+-   demangle_signature.  */
+-
+-static int
+-iterate_demangle_function (struct work_stuff *work, const char **mangled,
+-                           string *declp, const char *scan)
+-{
+-  const char *mangle_init = *mangled;
+-  int success = 0;
+-  string decl_init;
+-  struct work_stuff work_init;
+-
+-  if (*(scan + 2) == '\0')
+-    return 0;
+-
+-  /* Do not iterate for some demangling modes, or if there's only one
+-     "__"-sequence.  This is the normal case.  */
+-  if (ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING
+-      || strstr (scan + 2, "__") == NULL)
+-    return demangle_function_name (work, mangled, declp, scan);
+-
+-  /* Save state so we can restart if the guess at the correct "__" was
+-     wrong.  */
+-  string_init (&decl_init);
+-  string_appends (&decl_init, declp);
+-  memset (&work_init, 0, sizeof work_init);
+-  work_stuff_copy_to_from (&work_init, work);
+-
+-  /* Iterate over occurrences of __, allowing names and types to have a
+-     "__" sequence in them.  We must start with the first (not the last)
+-     occurrence, since "__" most often occur between independent mangled
+-     parts, hence starting at the last occurence inside a signature
+-     might get us a "successful" demangling of the signature.  */
+-
+-  while (scan[2])
+-    {
+-      if (demangle_function_name (work, mangled, declp, scan))
+-	{
+-	  success = demangle_signature (work, mangled, declp);
+-	  if (success)
+-	    break;
+-	}
+-
+-      /* Reset demangle state for the next round.  */
+-      *mangled = mangle_init;
+-      string_clear (declp);
+-      string_appends (declp, &decl_init);
+-      work_stuff_copy_to_from (work, &work_init);
+-
+-      /* Leave this underscore-sequence.  */
+-      scan += 2;
+-
+-      /* Scan for the next "__" sequence.  */
+-      while (*scan && (scan[0] != '_' || scan[1] != '_'))
+-	scan++;
+-
+-      /* Move to last "__" in this sequence.  */
+-      while (*scan && *scan == '_')
+-	scan++;
+-      scan -= 2;
+-    }
+-
+-  /* Delete saved state.  */
+-  delete_work_stuff (&work_init);
+-  string_delete (&decl_init);
+-
+-  return success;
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	demangle_prefix -- consume the mangled name prefix and find signature
+-
+-SYNOPSIS
+-
+-	static int
+-	demangle_prefix (struct work_stuff *work, const char **mangled,
+-			 string *declp);
+-
+-DESCRIPTION
+-
+-	Consume and demangle the prefix of the mangled name.
+-	While processing the function name root, arrange to call
+-	demangle_signature if the root is ambiguous.
+-
+-	DECLP points to the string buffer into which demangled output is
+-	placed.  On entry, the buffer is empty.  On exit it contains
+-	the root function name, the demangled operator name, or in some
+-	special cases either nothing or the completely demangled result.
+-
+-	MANGLED points to the current pointer into the mangled name.  As each
+-	token of the mangled name is consumed, it is updated.  Upon entry
+-	the current mangled name pointer points to the first character of
+-	the mangled name.  Upon exit, it should point to the first character
+-	of the signature if demangling was successful, or to the first
+-	unconsumed character if demangling of the prefix was unsuccessful.
+-
+-	Returns 1 on success, 0 otherwise.
+- */
+-
+-static int
+-demangle_prefix (struct work_stuff *work, const char **mangled,
+-                 string *declp)
+-{
+-  int success = 1;
+-  const char *scan;
+-  int i;
+-
+-  if (strlen(*mangled) > 6
+-      && (strncmp(*mangled, "_imp__", 6) == 0
+-          || strncmp(*mangled, "__imp_", 6) == 0))
+-    {
+-      /* it's a symbol imported from a PE dynamic library. Check for both
+-         new style prefix _imp__ and legacy __imp_ used by older versions
+-	 of dlltool. */
+-      (*mangled) += 6;
+-      work->dllimported = 1;
+-    }
+-  else if (strlen(*mangled) >= 11 && strncmp(*mangled, "_GLOBAL_", 8) == 0)
+-    {
+-      char *marker = strchr (cplus_markers, (*mangled)[8]);
+-      if (marker != NULL && *marker == (*mangled)[10])
+-	{
+-	  if ((*mangled)[9] == 'D')
+-	    {
+-	      /* it's a GNU global destructor to be executed at program exit */
+-	      (*mangled) += 11;
+-	      work->destructor = 2;
+-	      if (gnu_special (work, mangled, declp))
+-		return success;
+-	    }
+-	  else if ((*mangled)[9] == 'I')
+-	    {
+-	      /* it's a GNU global constructor to be executed at program init */
+-	      (*mangled) += 11;
+-	      work->constructor = 2;
+-	      if (gnu_special (work, mangled, declp))
+-		return success;
+-	    }
+-	}
+-    }
+-  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__std__", 7) == 0)
+-    {
+-      /* it's a ARM global destructor to be executed at program exit */
+-      (*mangled) += 7;
+-      work->destructor = 2;
+-    }
+-  else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__sti__", 7) == 0)
+-    {
+-      /* it's a ARM global constructor to be executed at program initial */
+-      (*mangled) += 7;
+-      work->constructor = 2;
+-    }
+-
+-  /*  This block of code is a reduction in strength time optimization
+-      of:
+-      scan = strstr (*mangled, "__"); */
+-
+-  {
+-    scan = *mangled;
+-
+-    do {
+-      scan = strchr (scan, '_');
+-    } while (scan != NULL && *++scan != '_');
+-
+-    if (scan != NULL) --scan;
+-  }
+-
+-  if (scan != NULL)
+-    {
+-      /* We found a sequence of two or more '_', ensure that we start at
+-	 the last pair in the sequence.  */
+-      i = strspn (scan, "_");
+-      if (i > 2)
+-	{
+-	  scan += (i - 2);
+-	}
+-    }
+-
+-  if (scan == NULL)
+-    {
+-      success = 0;
+-    }
+-  else if (work -> static_type)
+-    {
+-      if (!ISDIGIT ((unsigned char)scan[0]) && (scan[0] != 't'))
+-	{
+-	  success = 0;
+-	}
+-    }
+-  else if ((scan == *mangled)
+-	   && (ISDIGIT ((unsigned char)scan[2]) || (scan[2] == 'Q')
+-	       || (scan[2] == 't') || (scan[2] == 'K') || (scan[2] == 'H')))
+-    {
+-      /* The ARM says nothing about the mangling of local variables.
+-	 But cfront mangles local variables by prepending __<nesting_level>
+-	 to them. As an extension to ARM demangling we handle this case.  */
+-      if ((LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING)
+-	  && ISDIGIT ((unsigned char)scan[2]))
+-	{
+-	  *mangled = scan + 2;
+-	  consume_count (mangled);
+-	  string_append (declp, *mangled);
+-	  *mangled += strlen (*mangled);
+-	  success = 1;
+-	}
+-      else
+-	{
+-	  /* A GNU style constructor starts with __[0-9Qt].  But cfront uses
+-	     names like __Q2_3foo3bar for nested type names.  So don't accept
+-	     this style of constructor for cfront demangling.  A GNU
+-	     style member-template constructor starts with 'H'. */
+-	  if (!(LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING))
+-	    work -> constructor += 1;
+-	  *mangled = scan + 2;
+-	}
+-    }
+-  else if (ARM_DEMANGLING && scan[2] == 'p' && scan[3] == 't')
+-    {
+-      /* Cfront-style parameterized type.  Handled later as a signature. */
+-      success = 1;
+-
+-      /* ARM template? */
+-      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+-    }
+-  else if (EDG_DEMANGLING && ((scan[2] == 't' && scan[3] == 'm')
+-                              || (scan[2] == 'p' && scan[3] == 's')
+-                              || (scan[2] == 'p' && scan[3] == 't')))
+-    {
+-      /* EDG-style parameterized type.  Handled later as a signature. */
+-      success = 1;
+-
+-      /* EDG template? */
+-      demangle_arm_hp_template (work, mangled, strlen (*mangled), declp);
+-    }
+-  else if ((scan == *mangled) && !ISDIGIT ((unsigned char)scan[2])
+-	   && (scan[2] != 't'))
+-    {
+-      /* Mangled name starts with "__".  Skip over any leading '_' characters,
+-	 then find the next "__" that separates the prefix from the signature.
+-	 */
+-      if (!(ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+-	  || (arm_special (mangled, declp) == 0))
+-	{
+-	  while (*scan == '_')
+-	    {
+-	      scan++;
+-	    }
+-	  if ((scan = strstr (scan, "__")) == NULL || (*(scan + 2) == '\0'))
+-	    {
+-	      /* No separator (I.E. "__not_mangled"), or empty signature
+-		 (I.E. "__not_mangled_either__") */
+-	      success = 0;
+-	    }
+-	  else
+-	    return iterate_demangle_function (work, mangled, declp, scan);
+-	}
+-    }
+-  else if (*(scan + 2) != '\0')
+-    {
+-      /* Mangled name does not start with "__" but does have one somewhere
+-	 in there with non empty stuff after it.  Looks like a global
+-	 function name.  Iterate over all "__":s until the right
+-	 one is found.  */
+-      return iterate_demangle_function (work, mangled, declp, scan);
+-    }
+-  else
+-    {
+-      /* Doesn't look like a mangled name */
+-      success = 0;
+-    }
+-
+-  if (!success && (work->constructor == 2 || work->destructor == 2))
+-    {
+-      string_append (declp, *mangled);
+-      *mangled += strlen (*mangled);
+-      success = 1;
+-    }
+-  return (success);
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	gnu_special -- special handling of gnu mangled strings
+-
+-SYNOPSIS
+-
+-	static int
+-	gnu_special (struct work_stuff *work, const char **mangled,
+-		     string *declp);
+-
+-
+-DESCRIPTION
+-
+-	Process some special GNU style mangling forms that don't fit
+-	the normal pattern.  For example:
+-
+-		_$_3foo		(destructor for class foo)
+-		_vt$foo		(foo virtual table)
+-		_vt$foo$bar	(foo::bar virtual table)
+-		__vt_foo	(foo virtual table, new style with thunks)
+-		_3foo$varname	(static data member)
+-		_Q22rs2tu$vw	(static data member)
+-		__t6vector1Zii	(constructor with template)
+-		__thunk_4__$_7ostream (virtual function thunk)
+- */
+-
+-static int
+-gnu_special (struct work_stuff *work, const char **mangled, string *declp)
+-{
+-  int n;
+-  int success = 1;
+-  const char *p;
+-
+-  if ((*mangled)[0] == '_' && (*mangled)[1] != '\0'
+-      && strchr (cplus_markers, (*mangled)[1]) != NULL
+-      && (*mangled)[2] == '_')
+-    {
+-      /* Found a GNU style destructor, get past "_<CPLUS_MARKER>_" */
+-      (*mangled) += 3;
+-      work -> destructor += 1;
+-    }
+-  else if ((*mangled)[0] == '_'
+-	   && (((*mangled)[1] == '_'
+-		&& (*mangled)[2] == 'v'
+-		&& (*mangled)[3] == 't'
+-		&& (*mangled)[4] == '_')
+-	       || ((*mangled)[1] == 'v'
+-		   && (*mangled)[2] == 't' && (*mangled)[3] != '\0'
+-		   && strchr (cplus_markers, (*mangled)[3]) != NULL)))
+-    {
+-      /* Found a GNU style virtual table, get past "_vt<CPLUS_MARKER>"
+-         and create the decl.  Note that we consume the entire mangled
+-	 input string, which means that demangle_signature has no work
+-	 to do.  */
+-      if ((*mangled)[2] == 'v')
+-	(*mangled) += 5; /* New style, with thunks: "__vt_" */
+-      else
+-	(*mangled) += 4; /* Old style, no thunks: "_vt<CPLUS_MARKER>" */
+-      while (**mangled != '\0')
+-	{
+-	  switch (**mangled)
+-	    {
+-	    case 'Q':
+-	    case 'K':
+-	      success = demangle_qualified (work, mangled, declp, 0, 1);
+-	      break;
+-	    case 't':
+-	      success = demangle_template (work, mangled, declp, 0, 1,
+-					   1);
+-	      break;
+-	    default:
+-	      if (ISDIGIT((unsigned char)*mangled[0]))
+-		{
+-		  n = consume_count(mangled);
+-		  /* We may be seeing a too-large size, or else a
+-		     ".<digits>" indicating a static local symbol.  In
+-		     any case, declare victory and move on; *don't* try
+-		     to use n to allocate.  */
+-		  if (n > (int) strlen (*mangled))
+-		    {
+-		      success = 1;
+-		      break;
+-		    }
+-		  else if (n == -1)
+-		    {
+-		      success = 0;
+-		      break;
+-		    }
+-		}
+-	      else
+-		{
+-		  n = strcspn (*mangled, cplus_markers);
+-		}
+-	      string_appendn (declp, *mangled, n);
+-	      (*mangled) += n;
+-	    }
+-
+-	  p = strpbrk (*mangled, cplus_markers);
+-	  if (success && ((p == NULL) || (p == *mangled)))
+-	    {
+-	      if (p != NULL)
+-		{
+-		  string_append (declp, SCOPE_STRING (work));
+-		  (*mangled)++;
+-		}
+-	    }
+-	  else
+-	    {
+-	      success = 0;
+-	      break;
+-	    }
+-	}
+-      if (success)
+-	string_append (declp, " virtual table");
+-    }
+-  else if ((*mangled)[0] == '_'
+-	   && (strchr("0123456789Qt", (*mangled)[1]) != NULL)
+-	   && (p = strpbrk (*mangled, cplus_markers)) != NULL)
+-    {
+-      /* static data member, "_3foo$varname" for example */
+-      (*mangled)++;
+-      switch (**mangled)
+-	{
+-	case 'Q':
+-	case 'K':
+-	  success = demangle_qualified (work, mangled, declp, 0, 1);
+-	  break;
+-	case 't':
+-	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+-	  break;
+-	default:
+-	  n = consume_count (mangled);
+-	  if (n < 0 || n > (long) strlen (*mangled))
+-	    {
+-	      success = 0;
+-	      break;
+-	    }
+-
+-	  if (n > 10 && strncmp (*mangled, "_GLOBAL_", 8) == 0
+-	      && (*mangled)[9] == 'N'
+-	      && (*mangled)[8] == (*mangled)[10]
+-	      && strchr (cplus_markers, (*mangled)[8]))
+-	    {
+-	      /* A member of the anonymous namespace.  There's information
+-		 about what identifier or filename it was keyed to, but
+-		 it's just there to make the mangled name unique; we just
+-		 step over it.  */
+-	      string_append (declp, "{anonymous}");
+-	      (*mangled) += n;
+-
+-	      /* Now p points to the marker before the N, so we need to
+-		 update it to the first marker after what we consumed.  */
+-	      p = strpbrk (*mangled, cplus_markers);
+-	      break;
+-	    }
+-
+-	  string_appendn (declp, *mangled, n);
+-	  (*mangled) += n;
+-	}
+-      if (success && (p == *mangled))
+-	{
+-	  /* Consumed everything up to the cplus_marker, append the
+-	     variable name.  */
+-	  (*mangled)++;
+-	  string_append (declp, SCOPE_STRING (work));
+-	  n = strlen (*mangled);
+-	  string_appendn (declp, *mangled, n);
+-	  (*mangled) += n;
+-	}
+-      else
+-	{
+-	  success = 0;
+-	}
+-    }
+-  else if (strncmp (*mangled, "__thunk_", 8) == 0)
+-    {
+-      int delta;
+-
+-      (*mangled) += 8;
+-      delta = consume_count (mangled);
+-      if (delta == -1)
+-	success = 0;
+-      else if (**mangled != '_')
+-        success = 0;
+-      else
+-	{
+-	  char *method = internal_cplus_demangle (work, ++*mangled);
+-
+-	  if (method)
+-	    {
+-	      char buf[50];
+-	      sprintf (buf, "virtual function thunk (delta:%d) for ", -delta);
+-	      string_append (declp, buf);
+-	      string_append (declp, method);
+-	      free (method);
+-	      n = strlen (*mangled);
+-	      (*mangled) += n;
+-	    }
+-	  else
+-	    {
+-	      success = 0;
+-	    }
+-	}
+-    }
+-  else if (strncmp (*mangled, "__t", 3) == 0
+-	   && ((*mangled)[3] == 'i' || (*mangled)[3] == 'f'))
+-    {
+-      p = (*mangled)[3] == 'i' ? " type_info node" : " type_info function";
+-      (*mangled) += 4;
+-      switch (**mangled)
+-	{
+-	case 'Q':
+-	case 'K':
+-	  success = demangle_qualified (work, mangled, declp, 0, 1);
+-	  break;
+-	case 't':
+-	  success = demangle_template (work, mangled, declp, 0, 1, 1);
+-	  break;
+-	default:
+-	  success = do_type (work, mangled, declp);
+-	  break;
+-	}
+-      if (success && **mangled != '\0')
+-	success = 0;
+-      if (success)
+-	string_append (declp, p);
+-    }
+-  else
+-    {
+-      success = 0;
+-    }
+-  return (success);
+-}
+-
+-static void
+-recursively_demangle(struct work_stuff *work, const char **mangled,
+-                     string *result, int namelength)
+-{
+-  char * recurse = (char *)NULL;
+-  char * recurse_dem = (char *)NULL;
+-
+-  recurse = XNEWVEC (char, namelength + 1);
+-  memcpy (recurse, *mangled, namelength);
+-  recurse[namelength] = '\000';
+-
+-  recurse_dem = cplus_demangle (recurse, work->options);
+-
+-  if (recurse_dem)
+-    {
+-      string_append (result, recurse_dem);
+-      free (recurse_dem);
+-    }
+-  else
+-    {
+-      string_appendn (result, *mangled, namelength);
+-    }
+-  free (recurse);
+-  *mangled += namelength;
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	arm_special -- special handling of ARM/lucid mangled strings
+-
+-SYNOPSIS
+-
+-	static int
+-	arm_special (const char **mangled,
+-		     string *declp);
+-
+-
+-DESCRIPTION
+-
+-	Process some special ARM style mangling forms that don't fit
+-	the normal pattern.  For example:
+-
+-		__vtbl__3foo		(foo virtual table)
+-		__vtbl__3foo__3bar	(bar::foo virtual table)
+-
+- */
+-
+-static int
+-arm_special (const char **mangled, string *declp)
+-{
+-  int n;
+-  int success = 1;
+-  const char *scan;
+-
+-  if (strncmp (*mangled, ARM_VTABLE_STRING, ARM_VTABLE_STRLEN) == 0)
+-    {
+-      /* Found a ARM style virtual table, get past ARM_VTABLE_STRING
+-         and create the decl.  Note that we consume the entire mangled
+-	 input string, which means that demangle_signature has no work
+-	 to do.  */
+-      scan = *mangled + ARM_VTABLE_STRLEN;
+-      while (*scan != '\0')        /* first check it can be demangled */
+-        {
+-          n = consume_count (&scan);
+-          if (n == -1)
+-	    {
+-	      return (0);           /* no good */
+-	    }
+-          scan += n;
+-          if (scan[0] == '_' && scan[1] == '_')
+-	    {
+-	      scan += 2;
+-	    }
+-        }
+-      (*mangled) += ARM_VTABLE_STRLEN;
+-      while (**mangled != '\0')
+-	{
+-	  n = consume_count (mangled);
+-          if (n == -1
+-	      || n > (long) strlen (*mangled))
+-	    return 0;
+-	  string_prependn (declp, *mangled, n);
+-	  (*mangled) += n;
+-	  if ((*mangled)[0] == '_' && (*mangled)[1] == '_')
+-	    {
+-	      string_prepend (declp, "::");
+-	      (*mangled) += 2;
+-	    }
+-	}
+-      string_append (declp, " virtual table");
+-    }
+-  else
+-    {
+-      success = 0;
+-    }
+-  return (success);
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	demangle_qualified -- demangle 'Q' qualified name strings
+-
+-SYNOPSIS
+-
+-	static int
+-	demangle_qualified (struct work_stuff *, const char *mangled,
+-			    string *result, int isfuncname, int append);
+-
+-DESCRIPTION
+-
+-	Demangle a qualified name, such as "Q25Outer5Inner" which is
+-	the mangled form of "Outer::Inner".  The demangled output is
+-	prepended or appended to the result string according to the
+-	state of the append flag.
+-
+-	If isfuncname is nonzero, then the qualified name we are building
+-	is going to be used as a member function name, so if it is a
+-	constructor or destructor function, append an appropriate
+-	constructor or destructor name.  I.E. for the above example,
+-	the result for use as a constructor is "Outer::Inner::Inner"
+-	and the result for use as a destructor is "Outer::Inner::~Inner".
+-
+-BUGS
+-
+-	Numeric conversion is ASCII dependent (FIXME).
+-
+- */
+-
+-static int
+-demangle_qualified (struct work_stuff *work, const char **mangled,
+-                    string *result, int isfuncname, int append)
+-{
+-  int qualifiers = 0;
+-  int success = 1;
+-  char num[2];
+-  string temp;
+-  string last_name;
+-  int bindex = register_Btype (work);
+-
+-  /* We only make use of ISFUNCNAME if the entity is a constructor or
+-     destructor.  */
+-  isfuncname = (isfuncname
+-		&& ((work->constructor & 1) || (work->destructor & 1)));
+-
+-  string_init (&temp);
+-  string_init (&last_name);
+-
+-  if ((*mangled)[0] == 'K')
+-    {
+-    /* Squangling qualified name reuse */
+-      int idx;
+-      (*mangled)++;
+-      idx = consume_count_with_underscores (mangled);
+-      if (idx == -1 || idx >= work -> numk)
+-        success = 0;
+-      else
+-        string_append (&temp, work -> ktypevec[idx]);
+-    }
+-  else
+-    switch ((*mangled)[1])
+-    {
+-    case '_':
+-      /* GNU mangled name with more than 9 classes.  The count is preceded
+-	 by an underscore (to distinguish it from the <= 9 case) and followed
+-	 by an underscore.  */
+-      (*mangled)++;
+-      qualifiers = consume_count_with_underscores (mangled);
+-      if (qualifiers == -1)
+-	success = 0;
+-      break;
+-
+-    case '1':
+-    case '2':
+-    case '3':
+-    case '4':
+-    case '5':
+-    case '6':
+-    case '7':
+-    case '8':
+-    case '9':
+-      /* The count is in a single digit.  */
+-      num[0] = (*mangled)[1];
+-      num[1] = '\0';
+-      qualifiers = atoi (num);
+-
+-      /* If there is an underscore after the digit, skip it.  This is
+-	 said to be for ARM-qualified names, but the ARM makes no
+-	 mention of such an underscore.  Perhaps cfront uses one.  */
+-      if ((*mangled)[2] == '_')
+-	{
+-	  (*mangled)++;
+-	}
+-      (*mangled) += 2;
+-      break;
+-
+-    case '0':
+-    default:
+-      success = 0;
+-    }
+-
+-  if (!success)
+-    return success;
+-
+-  /* Pick off the names and collect them in the temp buffer in the order
+-     in which they are found, separated by '::'.  */
+-
+-  while (qualifiers-- > 0)
+-    {
+-      int remember_K = 1;
+-      string_clear (&last_name);
+-
+-      if (*mangled[0] == '_')
+-	(*mangled)++;
+-
+-      if (*mangled[0] == 't')
+-	{
+-	  /* Here we always append to TEMP since we will want to use
+-	     the template name without the template parameters as a
+-	     constructor or destructor name.  The appropriate
+-	     (parameter-less) value is returned by demangle_template
+-	     in LAST_NAME.  We do not remember the template type here,
+-	     in order to match the G++ mangling algorithm.  */
+-	  success = demangle_template(work, mangled, &temp,
+-				      &last_name, 1, 0);
+-	  if (!success)
+-	    break;
+-	}
+-      else if (*mangled[0] == 'K')
+-	{
+-          int idx;
+-          (*mangled)++;
+-          idx = consume_count_with_underscores (mangled);
+-          if (idx == -1 || idx >= work->numk)
+-            success = 0;
+-          else
+-            string_append (&temp, work->ktypevec[idx]);
+-          remember_K = 0;
+-
+-	  if (!success) break;
+-	}
+-      else
+-	{
+-	  if (EDG_DEMANGLING)
+-            {
+-	      int namelength;
+- 	      /* Now recursively demangle the qualifier
+- 	       * This is necessary to deal with templates in
+- 	       * mangling styles like EDG */
+-	      namelength = consume_count (mangled);
+-	      if (namelength == -1)
+-		{
+-		  success = 0;
+-		  break;
+-		}
+- 	      recursively_demangle(work, mangled, &temp, namelength);
+-            }
+-          else
+-            {
+-              string_delete (&last_name);
+-              success = do_type (work, mangled, &last_name);
+-              if (!success)
+-                break;
+-              string_appends (&temp, &last_name);
+-            }
+-	}
+-
+-      if (remember_K)
+-	remember_Ktype (work, temp.b, LEN_STRING (&temp));
+-
+-      if (qualifiers > 0)
+-	string_append (&temp, SCOPE_STRING (work));
+-    }
+-
+-  remember_Btype (work, temp.b, LEN_STRING (&temp), bindex);
+-
+-  /* If we are using the result as a function name, we need to append
+-     the appropriate '::' separated constructor or destructor name.
+-     We do this here because this is the most convenient place, where
+-     we already have a pointer to the name and the length of the name.  */
+-
+-  if (isfuncname)
+-    {
+-      string_append (&temp, SCOPE_STRING (work));
+-      if (work -> destructor & 1)
+-	string_append (&temp, "~");
+-      string_appends (&temp, &last_name);
+-    }
+-
+-  /* Now either prepend the temp buffer to the result, or append it,
+-     depending upon the state of the append flag.  */
+-
+-  if (append)
+-    string_appends (result, &temp);
+-  else
+-    {
+-      if (!STRING_EMPTY (result))
+-	string_append (&temp, SCOPE_STRING (work));
+-      string_prepends (result, &temp);
+-    }
+-
+-  string_delete (&last_name);
+-  string_delete (&temp);
+-  return (success);
+-}
+-
+-/*
+-
+-LOCAL FUNCTION
+-
+-	get_count -- convert an ascii count to integer, consuming tokens
+-
+-SYNOPSIS
+-
+-	static int
+-	get_count (const char **type, int *count)
+-
+-DESCRIPTION
+-
+-	Assume that *type points at a count in a mangled name; set
+-	*count to its value, and set *type to the next character after
+-	the count.  There are some weird rules in effect here.
+-
+-	If *type does not point at a string of digits, return zero.
+-
+-	If *type points at a string of digits followed by an
+-	underscore, set *count to their value as an integer, advance
+-	*type to point *after the underscore, and return 1.
+-
+-	If *type points at a string of digits not followed by an
+-	underscore, consume only the first digit.  Set *count to its
+-	value as an integer, leave *type pointing after that digit,
+-	and return 1.
+-
+-        The excuse for this odd behavior: in the ARM and HP demangling
+-        styles, a type can be followed by a repeat count of the form
+-        `Nxy', where:
+-
+-        `x' is a single digit specifying how many additional copies
+-            of the type to append to the argument list, and
+-
+-        `y' is one or more digits, specifying the zero-based index of
+-            the first repeated argument in the list.  Yes, as you're
+-            unmangling the name you can figure this out yourself, but
+-            it's there anyway.
+-
+-        So, for example, in `bar__3fooFPiN51', the first argument is a
+-        pointer to an integer (`Pi'), and then the next five arguments
+-        are the same (`N5'), and the first repeat is the function's
+-        second argument (`1').
+-*/
+-
+-static int
+-get_count (const char **type, int *count)
+-{
+-  const char *p;
+-  int n;
+-
+-  if (!ISDIGIT ((unsigned char)**type))
+-    return (0);
+-  else
+-    {
+-      *count = **type - '0';
+-      (*type)++;
+-      if (ISDIGIT ((unsigned char)**type))
+-	{
+-	  p = *type;
+-	  n = *count;
+-	  do
+-	    {
+-	      n *= 10;
+-	      n += *p - '0';
+-	      p++;
+-	    }
+-	  while (ISDIGIT ((unsigned char)*p));
+-	  if (*p == '_')
+-	    {
+-	      *type = p + 1;
+-	      *count = n;
+-	    }
+-	}
+-    }
+-  return (1);
+-}
+-
+-/* RESULT will be initialised here; it will be freed on failure.  The
+-   value returned is really a type_kind_t.  */
+-
+-static int
+-do_type (struct work_stuff *work, const char **mangled, string *result)
+-{
+-  int n;
+-  int i;
+-  int is_proctypevec;
+-  int done;
+-  int success;
+-  string decl;
+-  const char *remembered_type;
+-  int type_quals;
+-  type_kind_t tk = tk_none;
+-
+-  string_init (&decl);
+-  string_init (result);
+-
+-  done = 0;
+-  success = 1;
+-  is_proctypevec = 0;
+-  while (success && !done)
+-    {
+-      int member;
+-      switch (**mangled)
+-	{
+-
+-	  /* A pointer type */
+-	case 'P':
+-	case 'p':
+-	  (*mangled)++;
+-	  if (! (work -> options & DMGL_JAVA))
+-	    string_prepend (&decl, "*");
+-	  if (tk == tk_none)
+-	    tk = tk_pointer;
+-	  break;
+-
+-	  /* A reference type */
+-	case 'R':
+-	  (*mangled)++;
+-	  string_prepend (&decl, "&");
+-	  if (tk == tk_none)
+-	    tk = tk_reference;
+-	  break;
+-
+-	  /* An rvalue reference type */
+-	case 'O':
+-	  (*mangled)++;
+-	  string_prepend (&decl, "&&");
+-	  if (tk == tk_none)
+-	    tk = tk_rvalue_reference;
+-	  break;
+-
+-	  /* An array */
+-	case 'A':
+-	  {
+-	    ++(*mangled);
+-	    if (!STRING_EMPTY (&decl)
+-		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+-	      {
+-		string_prepend (&decl, "(");
+-		string_append (&decl, ")");
+-	      }
+-	    string_append (&decl, "[");
+-	    if (**mangled != '_')
+-	      success = demangle_template_value_parm (work, mangled, &decl,
+-						      tk_integral);
+-	    if (**mangled == '_')
+-	      ++(*mangled);
+-	    string_append (&decl, "]");
+-	    break;
+-	  }
+-
+-	/* A back reference to a previously seen type */
+-	case 'T':
+-	  (*mangled)++;
+-	  if (!get_count (mangled, &n) || n < 0 || n >= work -> ntypes)
+-	    {
+-	      success = 0;
+-	    }
+-	  else
+-	    for (i = 0; i < work->nproctypes; i++)
+-	      if (work -> proctypevec [i] == n)
+-	        success = 0;
+-
+-	  if (success)
+-	    {    
+-	      is_proctypevec = 1;
+-	      push_processed_type (work, n);
+-	      remembered_type = work->typevec[n];
+-	      mangled = &remembered_type;
+-	    }
+-	  break;
+-
+-	  /* A function */
+-	case 'F':
+-	  (*mangled)++;
+-	    if (!STRING_EMPTY (&decl)
+-		&& (decl.b[0] == '*' || decl.b[0] == '&'))
+-	    {
+-	      string_prepend (&decl, "(");
+-	      string_append (&decl, ")");
+-	    }
+-	  /* After picking off the function args, we expect to either find the
+-	     function return type (preceded by an '_') or the end of the
+-	     string.  */
+-	  if (!demangle_nested_args (work, mangled, &decl)
+-	      || (**mangled != '_' && **mangled != '\0'))
+-	    {
+-	      success = 0;
+-	      break;
+-	    }
+-	  if (success && (**mangled == '_'))
+-	    (*mangled)++;
+-	  break;
+-
+-	case 'M':
+-	  {
+-	    type_quals = TYPE_UNQUALIFIED;
+-
+-	    member = **mangled == 'M';
+-	    (*mangled)++;
+-
+-	    string_append (&decl, ")");
+-
+-	    /* We don't need to prepend `::' for a qualified name;
+-	       demangle_qualified will do that for us.  */
+-	    if (**mangled != 'Q')
+-	      string_prepend (&decl, SCOPE_STRING (work));
+-
+-	    if (ISDIGIT ((unsigned char)**mangled))
+-	      {
+-		n = consume_count (mangled);
+-		if (n == -1
+-		    || (int) strlen (*mangled) < n)
+-		  {
+-		    success = 0;
+-		    break;
+-		  }
+-		string_prependn (&decl, *mangled, n);
+-		*mangled += n;
+-	      }
+-	    else if (**mangled == 'X' || **mangled == 'Y')
+-	      {
+-		string temp;
+-		do_type (work, mangled, &temp);
+-		string_prepends (&decl, &temp);
+-		string_delete (&temp);
+-	      }
+-	    else if (**mangled == 't')
+-	      {
+-		string temp;
+-		string_init (&temp);
+-		success = demangle_template (work, mangled, &temp,
+-					     NULL, 1, 1);
+-		if (success)
+-		  {
+-		    string_prependn (&decl, temp.b, temp.p - temp.b);
+-		    string_delete (&temp);
+-		  }
+-		else
+-		  {
+-		    string_delete (&temp);
+-		    break;
+-		  }
+-	      }
+-	    else if (**mangled == 'Q')
+-	      {
+-		success = demangle_qualified (work, mangled, &decl,
+-					      /*isfuncnam=*/0, 
+-					      /*append=*/0);
+-		if (!success)
+-		  break;
+-	      }
+-	    else
+-	      {
+-		success = 0;
+-		break;
+-	      }
+-
+-	    string_prepend (&decl, "(");
+-	    if (member)
+-	      {
+-		switch (**mangled)
+-		  {
+-		  case 'C':
+-		  case 'V':
+-		  case 'u':
+-		    type_quals |= code_for_qualifier (**mangled);
+-		    (*mangled)++;
+-		    break;
+-
+-		  default:
+-		    break;
+-		  }
+-
+-		if (*(*mangled) != 'F')
+-		  {
+-		    success = 0;
+-		    break;
+-		  }
+-		(*mangled)++;
+-	      }
+-	    if ((member && !demangle_nested_args (work, mangled, &decl))
+-		|| **mangled != '_')
+-	      {
+-		success = 0;
+-		break;
+-	      }
+-	    (*mangled)++;
+-	    if (! PRINT_ANSI_QUALIFIERS)
+-	      {
+-		break;
+-	      }
+-	    if (type_quals != TYPE_UNQUALIFIED)
+-	      {
+-		APPEND_BLANK (&decl);
+-		string_append (&decl, qualifier_string (type_quals));
+-	      }
+-	    break;
+-	  }
+-        case 'G':
+-	  (*mangled)++;
+-	  break;
+-
+-	case 'C':
+-	case 'V':
+-	case 'u':
+-	  if (PRINT_ANSI_QUALIFIERS)
+-	    {
+-	      if (!STRING_EMPTY (&decl))
+-		string_prepend (&decl, " ");
+-
+-	      string_prepend (&decl, demangle_qualifier (**mangled));
+-	    }
+-	  (*mangled)++;
+-	  break;
+-	  /*
+-	    }
+-	    */
+-
+-	  /* fall through */
+-	default:
+-	  done = 1;
+-	  break;
+-	}
+-    }
+-
+-  if (success) switch (**mangled)
+-    {
+-      /* A qualified name, such as "Outer::Inner".  */
+-    case 'Q':
+-    case 'K':
+-      {
+-        success = demangle_qualified (work, mangled, result, 0, 1);
+-        break;
+-      }
+-
+-    /* A back reference to a previously seen squangled type */
+-    case 'B':
+-      (*mangled)++;
+-      if (!get_count (mangled, &n) || n < 0 || n >= work -> numb)
+-	success = 0;
+-      else
+-	string_append (result, work->btypevec[n]);
+-      break;
+-
+-    case 'X':
+-    case 'Y':
+-      /* A template parm.  We substitute the corresponding argument. */
+-      {
+-	int idx;
+-
+-	(*mangled)++;
+-	idx = consume_count_with_underscores (mangled);
+-
+-	if (idx == -1
+-	    || (work->tmpl_argvec && idx >= work->ntmpl_args)
+-	    || consume_count_with_underscores (mangled) == -1)
+-	  {
+-	    success = 0;
+-	    break;
+-	  }
+-
+-	if (work->tmpl_argvec)
+-	  string_append (result, work->tmpl_argvec[idx]);
+-	else
+-	  string_append_template_idx (result, idx);
+-
+-	success = 1;
+-      }
+-    break;
+-
+-    default:
+-      success = demangle_fund_type (work, mangled, result);
+-      if (tk == tk_none)
+-	tk = (type_kind_t) success;
+-      break;
+-    }
+-
+-  if (success)
+-    {
+-      if (!STRING_EMPTY (&decl))
+-	{
+-	  string_append (result, " ");
+-	  string_appends (result, &decl);
+-	}
+-    }
+-  else
+-    string_delete (result);
+-  string_delete (&decl);
+-
+-  if (is_proctypevec)
+-    pop_processed_type (work); 
+-
+-  if (success)
+-    /* Assume an integral type, if we're not sure.  */
+-    return (int) ((tk == tk_none) ? tk_integral : tk);
+-  else
+-    return 0;
+-}
+-
+-/* Given a pointer to a type string that represents a fundamental type
+-   argument (int, long, unsigned int, etc) in TYPE, a pointer to the
+-   string in which the demangled output is being built in RESULT, and
+-   the WORK structure, decode the types and add them to the result.
+-
+-   For example:
+-
+-   	"Ci"	=>	"const int"
+-	"Sl"	=>	"signed long"
+-	"CUs"	=>	"const unsigned short"
+-
+-   The value returned is really a type_kind_t.  */
+-
+-static int
+-demangle_fund_type (struct work_stuff *work,
+-                    const char **mangled, string *result)
+-{
+-  int done = 0;
+-  int success = 1;
+-  char buf[INTBUF_SIZE + 5 /* 'int%u_t' */];
+-  unsigned int dec = 0;
+-  type_kind_t tk = tk_integral;
+-
+-  /* First pick off any type qualifiers.  There can be more than one.  */
+-
+-  while (!done)
+-    {
+-      switch (**mangled)
+-	{
+-	case 'C':
+-	case 'V':
+-	case 'u':
+-	  if (PRINT_ANSI_QUALIFIERS)
+-	    {
+-              if (!STRING_EMPTY (result))
+-                string_prepend (result, " ");
+-	      string_prepend (result, demangle_qualifier (**mangled));
+-	    }
+-	  (*mangled)++;
+-	  break;
+-	case 'U':
+-	  (*mangled)++;
+-	  APPEND_BLANK (result);
+-	  string_append (result, "unsigned");
+-	  break;
+-	case 'S': /* signed char only */
+-	  (*mangled)++;
+-	  APPEND_BLANK (result);
+-	  string_append (result, "signed");
+-	  break;
+-	case 'J':
+-	  (*mangled)++;
+-	  APPEND_BLANK (result);
+-	  string_append (result, "__complex");
+-	  break;
+-	default:
+-	  done = 1;
+-	  break;
+-	}
+-    }
+-
+-  /* Now pick off the fundamental type.  There can be only one.  */
+-
+-  switch (**mangled)
+-    {
+-    case '\0':
+-    case '_':
+-      break;
+-    case 'v':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "void");
+-      break;
+-    case 'x':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "long long");
+-      break;
+-    case 'l':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "long");
+-      break;
+-    case 'i':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "int");
+-      break;
+-    case 's':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "short");
+-      break;
+-    case 'b':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "bool");
+-      tk = tk_bool;
+-      break;
+-    case 'c':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "char");
+-      tk = tk_char;
+-      break;
+-    case 'w':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "wchar_t");
+-      tk = tk_char;
+-      break;
+-    case 'r':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "long double");
+-      tk = tk_real;
+-      break;
+-    case 'd':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "double");
+-      tk = tk_real;
+-      break;
+-    case 'f':
+-      (*mangled)++;
+-      APPEND_BLANK (result);
+-      string_append (result, "float");
+-      tk = tk_real;
+-      break;
+-    case 'G':
+-      (*mangled)++;
+-      if (!ISDIGIT ((unsigned char)**mangled))
+-	{
+-	  success = 0;
+-	  break;
+-	}
+-      /* fall through */
+-    case 'I':
+-      (*mangled)++;
+-      if (**mangled == '_')
+-	{
+-	  int i;
+-	  (*mangled)++;
+-	  for (i = 0;
+-	       i < (long) sizeof (buf) - 1 && **mangled && **mangled != '_';
+-	       (*mangled)++, i++)
+-	    buf[i] = **mangled;
+-	  if (**mangled != '_')
+-	    {
+-	      success = 0;
+-	      break;
+-	    }
+-	  buf[i] = '\0';
+-	  (*mangled)++;
+-	}
+-      else
+-	{
+-	  strncpy (buf, *mangled, 2);
+-	  buf[2] = '\0';
+-	  *mangled += min (strlen (*mangled), 2);
+-	}
+-      sscanf (buf, "%x", &dec);
+-      sprintf (buf, "int%u_t", dec);
+-      APPEND_BLANK (result);
+-      string_append (result, buf);
+-      break;
+-
+-      /* fall through */
+-      /* An explicit type, such as "6mytype" or "7integer" */
+-    case '0':
+-    case '1':
+-    case '2':
+-    case '3':
+-    case '4':
+-    case '5':
+-    case '6':
+-    case '7':
+-    case '8':
+-    case '9':
+-      {
+-        int bindex = register_Btype (work);
+-        string btype;
+-        string_init (&btype);
+-        if (demangle_class_name (work, mangled, &btype)) {
+-          remember_Btype (work, btype.b, LEN_STRING (&btype), bindex);
+-          APPEND_BLANK (result);
+-          string_appends (result, &btype);
+-        }
+-        else
+-          success = 0;
+-        string_delete (&btype);
+-        break;
+-      }
+-    case 't':
+-      {
+-        string btype;
+-        string_init (&btype);
+-        success = demangle_template (work, mangled, &btype, 0, 1, 1);
+-        string_appends (result, &btype);
+-        string_delete (&btype);
+-        break;
+-      }
+-    default:
+-      success = 0;
+-      break;
+-    }
+-
+-  return success ? ((int) tk) : 0;
+-}
+-
+-
+-/* Handle a template's value parameter for HP aCC (extension from ARM)
+-   **mangled points to 'S' or 'U' */
+-
+-static int
+-do_hpacc_template_const_value (struct work_stuff *work ATTRIBUTE_UNUSED,
+-                               const char **mangled, string *result)
+-{
+-  int unsigned_const;
+-
+-  if (**mangled != 'U' && **mangled != 'S')
+-    return 0;
+-
+-  unsigned_const = (**mangled == 'U');
+-
+-  (*mangled)++;
+-
+-  switch (**mangled)
+-    {
+-      case 'N':
+-        string_append (result, "-");
+-        /* fall through */
+-      case 'P':
+-        (*mangled)++;
+-        break;
+-      case 'M':
+-        /* special case for -2^31 */
+-        string_append (result, "-2147483648");
+-        (*mangled)++;
+-        return 1;
+-      default:
+-        return 0;
+-    }
+-
+-  /* We have to be looking at an integer now */
+-  if (!(ISDIGIT ((unsigned char)**mangled)))
+-    return 0;
+-
+-  /* We only deal with integral values for template
+-     parameters -- so it's OK to look only for digits */
+-  while (ISDIGIT ((unsigned char)**mangled))
+-    {
+-      char_str[0] = **mangled;
+-      string_append (result, char_str);
+-      (*mangled)++;
+-    }
+-
+-  if (unsigned_const)
+-    string_append (result, "U");
+-
+-  /* FIXME? Some day we may have 64-bit (or larger :-) ) constants
+-     with L or LL suffixes. pai/1997-09-03 */
+-
+-  return 1; /* success */
+-}
+-
+-/* Handle a template's literal parameter for HP aCC (extension from ARM)
+-   **mangled is pointing to the 'A' */
+-
+-static int
+-do_hpacc_template_literal (struct work_stuff *work, const char **mangled,
+-                           string *result)
+-{
+-  int literal_len = 0;
+-  char * recurse;
+-  char * recurse_dem;
+-
+-  if (**mangled != 'A')
+-    return 0;
+-
+-  (*mangled)++;
+-
+-  literal_len = consume_count (mangled);
+-
+-  if (literal_len <= 0
+-      || literal_len > (long) strlen (*mangled))
+-    return 0;
+-
+-  /* Literal parameters are names of arrays, functions, etc.  and the
+-     canonical representation uses the address operator */
+-  string_append (result, "&");
+-
+-  /* Now recursively demangle the literal name */
+-  recurse = XNEWVEC (char, literal_len + 1);
+-  memcpy (recurse, *mangled, literal_len);
+-  recurse[literal_len] = '\000';
+-
+-  recurse_dem = cplus_demangle (recurse, work->options);
+-
+-  if (recurse_dem)
+-    {
+-      string_append (result, recurse_dem);
+-      free (recurse_dem);
+-    }
+-  else
+-    {
+-      string_appendn (result, *mangled, literal_len);
+-    }
+-  (*mangled) += literal_len;
+-  free (recurse);
+-
+-  return 1;
+-}
+-
+-static int
+-snarf_numeric_literal (const char **args, string *arg)
+-{
+-  if (**args == '-')
+-    {
+-      char_str[0] = '-';
+-      string_append (arg, char_str);
+-      (*args)++;
+-    }
+-  else if (**args == '+')
+-    (*args)++;
+-
+-  if (!ISDIGIT ((unsigned char)**args))
+-    return 0;
+-
+-  while (ISDIGIT ((unsigned char)**args))
+-    {
+-      char_str[0] = **args;
+-      string_append (arg, char_str);
+-      (*args)++;
+-    }
+-
+-  return 1;
+-}
+-
+-/* Demangle the next argument, given by MANGLED into RESULT, which
+-   *should be an uninitialized* string.  It will be initialized here,
+-   and free'd should anything go wrong.  */
+-
+-static int
+-do_arg (struct work_stuff *work, const char **mangled, string *result)
+-{
+-  /* Remember where we started so that we can record the type, for
+-     non-squangling type remembering.  */
+-  const char *start = *mangled;
+-
+-  string_init (result);
+-
+-  if (work->nrepeats > 0)
+-    {
+-      --work->nrepeats;
+-
+-      if (work->previous_argument == 0)
+-	return 0;
+-
+-      /* We want to reissue the previous type in this argument list.  */
+-      string_appends (result, work->previous_argument);
+-      return 1;
+-    }
+-
+-  if (**mangled == 'n')
+-    {
+-      /* A squangling-style repeat.  */
+-      (*mangled)++;
+-      work->nrepeats = consume_count(mangled);
+-
+-      if (work->nrepeats <= 0)
+-	/* This was not a repeat count after all.  */
+-	return 0;
+-
+-      if (work->nrepeats > 9)
+-	{
+-	  if (**mangled != '_')
+-	    /* The repeat count should be followed by an '_' in this
+-	       case.  */
+-	    return 0;
+-	  else
+-	    (*mangled)++;
+-	}
+-
+-      /* Now, the repeat is all set up.  */
+-      return do_arg (work, mangled, result);
+-    }
+-
+-  /* Save the result in WORK->previous_argument so that we can find it
+-     if it's repeated.  Note that saving START is not good enough: we
+-     do not want to add additional types to the back-referenceable
+-     type vector when processing a repeated type.  */
+-  if (work->previous_argument)
+-    string_delete (work->previous_argument);
+-  else
+-    work->previous_argument = XNEW (string);
+-
+-  if (!do_type (work, mangled, work->previous_argument))
+-    return 0;
+-
+-  string_appends (result, work->previous_argument);
+-
+-  remember_type (work, start, *mangled - start);
+-  return 1;
+-}
+-
+-static void
+-push_processed_type (struct work_stuff *work, int typevec_index)
+-{
+-  if (work->nproctypes >= work->proctypevec_size)
+-    {
+-      if (!work->proctypevec_size)
+-	{
+-	  work->proctypevec_size = 4;
+-	  work->proctypevec = XNEWVEC (int, work->proctypevec_size);
+-	}
+-      else 
+-	{
+-	  if (work->proctypevec_size < 16)
+-	    /* Double when small.  */
+-	    work->proctypevec_size *= 2;
+-	  else
+-	    {
+-	      /* Grow slower when large.  */
+-	      if (work->proctypevec_size > (INT_MAX / 3) * 2)
+-                xmalloc_failed (INT_MAX);
+-              work->proctypevec_size = (work->proctypevec_size * 3 / 2);
+-	    }   
+-          work->proctypevec
+-            = XRESIZEVEC (int, work->proctypevec, work->proctypevec_size);
+-	}
+-    }
+-    work->proctypevec [work->nproctypes++] = typevec_index;
+-}
+-
+-static void
+-pop_processed_type (struct work_stuff *work)
+-{
+-  work->nproctypes--;
+-}
+-
+-static void
+-remember_type (struct work_stuff *work, const char *start, int len)
+-{
+-  char *tem;
+-
+-  if (work->forgetting_types)
+-    return;
+-
+-  if (work -> ntypes >= work -> typevec_size)
+-    {
+-      if (work -> typevec_size == 0)
+-	{
+-	  work -> typevec_size = 3;
+-	  work -> typevec = XNEWVEC (char *, work->typevec_size);
+-	}
+-      else
+-	{
+-          if (work -> typevec_size > INT_MAX / 2)
+-	    xmalloc_failed (INT_MAX);
+-	  work -> typevec_size *= 2;
+-	  work -> typevec
+-	    = XRESIZEVEC (char *, work->typevec, work->typevec_size);
+-	}
+-    }
+-  tem = XNEWVEC (char, len + 1);
+-  memcpy (tem, start, len);
+-  tem[len] = '\0';
+-  work -> typevec[work -> ntypes++] = tem;
+-}
+-
+-
+-/* Remember a K type class qualifier. */
+-static void
+-remember_Ktype (struct work_stuff *work, const char *start, int len)
+-{
+-  char *tem;
+-
+-  if (work -> numk >= work -> ksize)
+-    {
+-      if (work -> ksize == 0)
+-	{
+-	  work -> ksize = 5;
+-	  work -> ktypevec = XNEWVEC (char *, work->ksize);
+-	}
+-      else
+-	{
+-          if (work -> ksize > INT_MAX / 2)
+-	    xmalloc_failed (INT_MAX);
+-	  work -> ksize *= 2;
+-	  work -> ktypevec
+-	    = XRESIZEVEC (char *, work->ktypevec, work->ksize);
+-	}
+-    }
+-  tem = XNEWVEC (char, len + 1);
+-  memcpy (tem, start, len);
+-  tem[len] = '\0';
+-  work -> ktypevec[work -> numk++] = tem;
+-}
+-
+-/* Register a B code, and get an index for it. B codes are registered
+-   as they are seen, rather than as they are completed, so map<temp<char> >
+-   registers map<temp<char> > as B0, and temp<char> as B1 */
+-
+-static int
+-register_Btype (struct work_stuff *work)
+-{
+-  int ret;
+-
+-  if (work -> numb >= work -> bsize)
+-    {
+-      if (work -> bsize == 0)
+-	{
+-	  work -> bsize = 5;
+-	  work -> btypevec = XNEWVEC (char *, work->bsize);
+-	}
+-      else
+-	{
+-          if (work -> bsize > INT_MAX / 2)
+-	    xmalloc_failed (INT_MAX);
+-	  work -> bsize *= 2;
+-	  work -> btypevec
+-	    = XRESIZEVEC (char *, work->btypevec, work->bsize);
+-	}
+-    }
+-  ret = work -> numb++;
+-  work -> btypevec[ret] = NULL;
+-  return(ret);
+-}
+-
+-/* Store a value into a previously registered B code type. */
+-
+-static void
+-remember_Btype (struct work_stuff *work, const char *start,
+-                int len, int index)
+-{
+-  char *tem;
+-
+-  tem = XNEWVEC (char, len + 1);
+-  memcpy (tem, start, len);
+-  tem[len] = '\0';
+-  work -> btypevec[index] = tem;
+-}
+-
+-/* Lose all the info related to B and K type codes. */
+-static void
+-forget_B_and_K_types (struct work_stuff *work)
+-{
+-  int i;
+-
+-  while (work -> numk > 0)
+-    {
+-      i = --(work -> numk);
+-      if (work -> ktypevec[i] != NULL)
+-	{
+-	  free (work -> ktypevec[i]);
+-	  work -> ktypevec[i] = NULL;
+-	}
+-    }
+-
+-  while (work -> numb > 0)
+-    {
+-      i = --(work -> numb);
+-      if (work -> btypevec[i] != NULL)
+-	{
+-	  free (work -> btypevec[i]);
+-	  work -> btypevec[i] = NULL;
+-	}
+-    }
+-}
+-/* Forget the remembered types, but not the type vector itself.  */
+-
+-static void
+-forget_types (struct work_stuff *work)
+-{
+-  int i;
+-
+-  while (work -> ntypes > 0)
+-    {
+-      i = --(work -> ntypes);
+-      if (work -> typevec[i] != NULL)
+-	{
+-	  free (work -> typevec[i]);
+-	  work -> typevec[i] = NULL;
+-	}
+-    }
+-}
+-
+-/* Process the argument list part of the signature, after any class spec
+-   has been consumed, as well as the first 'F' character (if any).  For
+-   example:
+-
+-   "__als__3fooRT0"		=>	process "RT0"
+-   "complexfunc5__FPFPc_PFl_i"	=>	process "PFPc_PFl_i"
+-
+-   DECLP must be already initialised, usually non-empty.  It won't be freed
+-   on failure.
+-
+-   Note that g++ differs significantly from ARM and lucid style mangling
+-   with regards to references to previously seen types.  For example, given
+-   the source fragment:
+-
+-     class foo {
+-       public:
+-       foo::foo (int, foo &ia, int, foo &ib, int, foo &ic);
+-     };
+-
+-     foo::foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+-     void foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; }
+-
+-   g++ produces the names:
+-
+-     __3fooiRT0iT2iT2
+-     foo__FiR3fooiT1iT1
+-
+-   while lcc (and presumably other ARM style compilers as well) produces:
+-
+-     foo__FiR3fooT1T2T1T2
+-     __ct__3fooFiR3fooT1T2T1T2
+-
+-   Note that g++ bases its type numbers starting at zero and counts all
+-   previously seen types, while lucid/ARM bases its type numbers starting
+-   at one and only considers types after it has seen the 'F' character
+-   indicating the start of the function args.  For lucid/ARM style, we
+-   account for this difference by discarding any previously seen types when
+-   we see the 'F' character, and subtracting one from the type number
+-   reference.
+-
+- */
+-
+-static int
+-demangle_args (struct work_stuff *work, const char **mangled,
+-               string *declp)
+-{
+-  string arg;
+-  int need_comma = 0;
+-  int r;
+-  int t;
+-  const char *tem;
+-  char temptype;
+-
+-  if (PRINT_ARG_TYPES)
+-    {
+-      string_append (declp, "(");
+-      if (**mangled == '\0')
+-	{
+-	  string_append (declp, "void");
+-	}
+-    }
+-
+-  while ((**mangled != '_' && **mangled != '\0' && **mangled != 'e')
+-	 || work->nrepeats > 0)
+-    {
+-      if ((**mangled == 'N') || (**mangled == 'T'))
+-	{
+-	  temptype = *(*mangled)++;
+-
+-	  if (temptype == 'N')
+-	    {
+-	      if (!get_count (mangled, &r))
+-		{
+-		  return (0);
+-		}
+-	    }
+-	  else
+-	    {
+-	      r = 1;
+-	    }
+-          if ((HP_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING) && work -> ntypes >= 10)
+-            {
+-              /* If we have 10 or more types we might have more than a 1 digit
+-                 index so we'll have to consume the whole count here. This
+-                 will lose if the next thing is a type name preceded by a
+-                 count but it's impossible to demangle that case properly
+-                 anyway. Eg if we already have 12 types is T12Pc "(..., type1,
+-                 Pc, ...)"  or "(..., type12, char *, ...)" */
+-              if ((t = consume_count(mangled)) <= 0)
+-                {
+-                  return (0);
+-                }
+-            }
+-          else
+-	    {
+-	      if (!get_count (mangled, &t))
+-	    	{
+-	          return (0);
+-	    	}
+-	    }
+-	  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+-	    {
+-	      t--;
+-	    }
+-	  /* Validate the type index.  Protect against illegal indices from
+-	     malformed type strings.  */
+-	  if ((t < 0) || (t >= work -> ntypes))
+-	    {
+-	      return (0);
+-	    }
+-	  while (work->nrepeats > 0 || --r >= 0)
+-	    {
+-	      tem = work -> typevec[t];
+-	      if (need_comma && PRINT_ARG_TYPES)
+-		{
+-		  string_append (declp, ", ");
+-		}
+-	      push_processed_type (work, t);  
+-	      if (!do_arg (work, &tem, &arg))
+-		{
+-		  pop_processed_type (work);
+-		  return (0);
+-		}
+-	      pop_processed_type (work);
+-	      if (PRINT_ARG_TYPES)
+-		{
+-		  string_appends (declp, &arg);
+-		}
+-	      string_delete (&arg);
+-	      need_comma = 1;
+-	    }
+-	}
+-      else
+-	{
+-	  if (need_comma && PRINT_ARG_TYPES)
+-	    string_append (declp, ", ");
+-	  if (!do_arg (work, mangled, &arg))
+-	    return (0);
+-	  if (PRINT_ARG_TYPES)
+-	    string_appends (declp, &arg);
+-	  string_delete (&arg);
+-	  need_comma = 1;
+-	}
+-    }
+-
+-  if (**mangled == 'e')
+-    {
+-      (*mangled)++;
+-      if (PRINT_ARG_TYPES)
+-	{
+-	  if (need_comma)
+-	    {
+-	      string_append (declp, ",");
+-	    }
+-	  string_append (declp, "...");
+-	}
+-    }
+-
+-  if (PRINT_ARG_TYPES)
+-    {
+-      string_append (declp, ")");
+-    }
+-  return (1);
+-}
+-
+-/* Like demangle_args, but for demangling the argument lists of function
+-   and method pointers or references, not top-level declarations.  */
+-
+-static int
+-demangle_nested_args (struct work_stuff *work, const char **mangled,
+-                      string *declp)
+-{
+-  string* saved_previous_argument;
+-  int result;
+-  int saved_nrepeats;
+-
+-  /* The G++ name-mangling algorithm does not remember types on nested
+-     argument lists, unless -fsquangling is used, and in that case the
+-     type vector updated by remember_type is not used.  So, we turn
+-     off remembering of types here.  */
+-  ++work->forgetting_types;
+-
+-  /* For the repeat codes used with -fsquangling, we must keep track of
+-     the last argument.  */
+-  saved_previous_argument = work->previous_argument;
+-  saved_nrepeats = work->nrepeats;
+-  work->previous_argument = 0;
+-  work->nrepeats = 0;
+-
+-  /* Actually demangle the arguments.  */
+-  result = demangle_args (work, mangled, declp);
+-
+-  /* Restore the previous_argument field.  */
+-  if (work->previous_argument)
+-    {
+-      string_delete (work->previous_argument);
+-      free ((char *) work->previous_argument);
+-    }
+-  work->previous_argument = saved_previous_argument;
+-  --work->forgetting_types;
+-  work->nrepeats = saved_nrepeats;
+-
+-  return result;
+-}
+-
+-/* Returns 1 if a valid function name was found or 0 otherwise.  */
+-
+-static int 
+-demangle_function_name (struct work_stuff *work, const char **mangled,
+-                        string *declp, const char *scan)
+-{
+-  size_t i;
+-  string type;
+-  const char *tem;
+-
+-  string_appendn (declp, (*mangled), scan - (*mangled));
+-  string_need (declp, 1);
+-  *(declp -> p) = '\0';
+-
+-  /* Consume the function name, including the "__" separating the name
+-     from the signature.  We are guaranteed that SCAN points to the
+-     separator.  */
+-
+-  (*mangled) = scan + 2;
+-  /* We may be looking at an instantiation of a template function:
+-     foo__Xt1t2_Ft3t4, where t1, t2, ... are template arguments and a
+-     following _F marks the start of the function arguments.  Handle
+-     the template arguments first. */
+-
+-  if (HP_DEMANGLING && (**mangled == 'X'))
+-    {
+-      demangle_arm_hp_template (work, mangled, 0, declp);
+-      /* This leaves MANGLED pointing to the 'F' marking func args */
+-    }
+-
+-  if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)
+-    {
+-
+-      /* See if we have an ARM style constructor or destructor operator.
+-	 If so, then just record it, clear the decl, and return.
+-	 We can't build the actual constructor/destructor decl until later,
+-	 when we recover the class name from the signature.  */
+-
+-      if (strcmp (declp -> b, "__ct") == 0)
+-	{
+-	  work -> constructor += 1;
+-	  string_clear (declp);
+-	  return 1;
+-	}
+-      else if (strcmp (declp -> b, "__dt") == 0)
+-	{
+-	  work -> destructor += 1;
+-	  string_clear (declp);
+-	  return 1;
+-	}
+-    }
+-
+-  if (declp->p - declp->b >= 3
+-      && declp->b[0] == 'o'
+-      && declp->b[1] == 'p'
+-      && strchr (cplus_markers, declp->b[2]) != NULL)
+-    {
+-      /* see if it's an assignment expression */
+-      if (declp->p - declp->b >= 10 /* op$assign_ */
+-	  && memcmp (declp->b + 3, "assign_", 7) == 0)
+-	{
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      int len = declp->p - declp->b - 10;
+-	      if ((int) strlen (optable[i].in) == len
+-		  && memcmp (optable[i].in, declp->b + 10, len) == 0)
+-		{
+-		  string_clear (declp);
+-		  string_append (declp, "operator");
+-		  string_append (declp, optable[i].out);
+-		  string_append (declp, "=");
+-		  break;
+-		}
+-	    }
+-	}
+-      else
+-	{
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      int len = declp->p - declp->b - 3;
+-	      if ((int) strlen (optable[i].in) == len
+-		  && memcmp (optable[i].in, declp->b + 3, len) == 0)
+-		{
+-		  string_clear (declp);
+-		  string_append (declp, "operator");
+-		  string_append (declp, optable[i].out);
+-		  break;
+-		}
+-	    }
+-	}
+-    }
+-  else if (declp->p - declp->b >= 5 && memcmp (declp->b, "type", 4) == 0
+-	   && strchr (cplus_markers, declp->b[4]) != NULL)
+-    {
+-      /* type conversion operator */
+-      tem = declp->b + 5;
+-      if (do_type (work, &tem, &type))
+-	{
+-	  string_clear (declp);
+-	  string_append (declp, "operator ");
+-	  string_appends (declp, &type);
+-	  string_delete (&type);
+-	}
+-    }
+-  else if (declp->b[0] == '_' && declp->b[1] == '_'
+-	   && declp->b[2] == 'o' && declp->b[3] == 'p')
+-    {
+-      /* ANSI.  */
+-      /* type conversion operator.  */
+-      tem = declp->b + 4;
+-      if (do_type (work, &tem, &type))
+-	{
+-	  string_clear (declp);
+-	  string_append (declp, "operator ");
+-	  string_appends (declp, &type);
+-	  string_delete (&type);
+-	}
+-    }
+-  else if (declp->b[0] == '_' && declp->b[1] == '_'
+-	   && ISLOWER((unsigned char)declp->b[2])
+-	   && ISLOWER((unsigned char)declp->b[3]))
+-    {
+-      if (declp->b[4] == '\0')
+-	{
+-	  /* Operator.  */
+-	  for (i = 0; i < ARRAY_SIZE (optable); i++)
+-	    {
+-	      if (strlen (optable[i].in) == 2
+-		  && memcmp (optable[i].in, declp->b + 2, 2) == 0)
+-		{
+-		  string_clear (declp);
+-		  string_append (declp, "operator");
+-		  string_append (declp, optable[i].out);
+-		  break;
+-		}
+-	    }
+-	}
+-      else
+-	{
+-	  if (declp->b[2] == 'a' && declp->b[5] == '\0')
+-	    {
+-	      /* Assignment.  */
+-	      for (i = 0; i < ARRAY_SIZE (optable); i++)
+-		{
+-		  if (strlen (optable[i].in) == 3
+-		      && memcmp (optable[i].in, declp->b + 2, 3) == 0)
+-		    {
+-		      string_clear (declp);
+-		      string_append (declp, "operator");
+-		      string_append (declp, optable[i].out);
+-		      break;
+-		    }
+-		}
+-	    }
+-	}
+-    }
+-
+-  /* If a function name was obtained but it's not valid, we were not
+-     successful.  */
+-  if (LEN_STRING (declp) == 1 && declp->b[0] == '.')
+-    return 0;
+-  else
+-    return 1;
+-}
+-
+-/* a mini string-handling package */
+-
+-static void
+-string_need (string *s, int n)
+-{
+-  int tem;
+-
+-  if (s->b == NULL)
+-    {
+-      if (n < 32)
+-	{
+-	  n = 32;
+-	}
+-      s->p = s->b = XNEWVEC (char, n);
+-      s->e = s->b + n;
+-    }
+-  else if (s->e - s->p < n)
+-    {
+-      tem = s->p - s->b;
+-      if (n > INT_MAX / 2 - tem)
+-        xmalloc_failed (INT_MAX); 
+-      n += tem;
+-      n *= 2;
+-      s->b = XRESIZEVEC (char, s->b, n);
+-      s->p = s->b + tem;
+-      s->e = s->b + n;
+-    }
+-}
+-
+-static void
+-string_delete (string *s)
+-{
+-  if (s->b != NULL)
+-    {
+-      free (s->b);
+-      s->b = s->e = s->p = NULL;
+-    }
+-}
+-
+-static void
+-string_init (string *s)
+-{
+-  s->b = s->p = s->e = NULL;
+-}
+-
+-static void
+-string_clear (string *s)
+-{
+-  s->p = s->b;
+-}
+-
+-#if 0
+-
+-static int
+-string_empty (string *s)
+-{
+-  return (s->b == s->p);
+-}
+-
+-#endif
+-
+-static void
+-string_append (string *p, const char *s)
+-{
+-  int n;
+-  if (s == NULL || *s == '\0')
+-    return;
+-  n = strlen (s);
+-  string_need (p, n);
+-  memcpy (p->p, s, n);
+-  p->p += n;
+-}
+-
+-static void
+-string_appends (string *p, string *s)
+-{
+-  int n;
+-
+-  if (s->b != s->p)
+-    {
+-      n = s->p - s->b;
+-      string_need (p, n);
+-      memcpy (p->p, s->b, n);
+-      p->p += n;
+-    }
+-}
+-
+-static void
+-string_appendn (string *p, const char *s, int n)
+-{
+-  if (n != 0)
+-    {
+-      string_need (p, n);
+-      memcpy (p->p, s, n);
+-      p->p += n;
+-    }
+-}
+-
+-static void
+-string_prepend (string *p, const char *s)
+-{
+-  if (s != NULL && *s != '\0')
+-    {
+-      string_prependn (p, s, strlen (s));
+-    }
+-}
+-
+-static void
+-string_prepends (string *p, string *s)
+-{
+-  if (s->b != s->p)
+-    {
+-      string_prependn (p, s->b, s->p - s->b);
+-    }
+-}
+-
+-static void
+-string_prependn (string *p, const char *s, int n)
+-{
+-  char *q;
+-
+-  if (n != 0)
+-    {
+-      string_need (p, n);
+-      for (q = p->p - 1; q >= p->b; q--)
+-	{
+-	  q[n] = q[0];
+-	}
+-      memcpy (p->b, s, n);
+-      p->p += n;
+-    }
+-}
+-
+-static void
+-string_append_template_idx (string *s, int idx)
+-{
+-  char buf[INTBUF_SIZE + 1 /* 'T' */];
+-  sprintf(buf, "T%d", idx);
+-  string_append (s, buf);
+-}
+--- libiberty/testsuite/demangle-expected
++++ libiberty/testsuite/demangle-expected
+@@ -20,3315 +20,64 @@
+ # A line starting with `#' is ignored.
+ # However, blank lines in this file are NOT ignored.
+ #
+---format=gnu --no-params
+-AddAlignment__9ivTSolverUiP12ivInteractorP7ivTGlue
+-ivTSolver::AddAlignment(unsigned int, ivInteractor *, ivTGlue *)
+-ivTSolver::AddAlignment
+-#
+---format=gnu --no-params
+-ArrowheadIntersects__9ArrowLineP9ArrowheadR6BoxObjP7Graphic
+-ArrowLine::ArrowheadIntersects(Arrowhead *, BoxObj &, Graphic *)
+-ArrowLine::ArrowheadIntersects
+-#
+---format=gnu --no-params
+-ArrowheadIntersects__9ArrowLineP9ArrowheadO6BoxObjP7Graphic
+-ArrowLine::ArrowheadIntersects(Arrowhead *, BoxObj &&, Graphic *)
+-ArrowLine::ArrowheadIntersects
+-#
+---format=gnu --no-params
+-AtEnd__13ivRubberGroup
+-ivRubberGroup::AtEnd(void)
+-ivRubberGroup::AtEnd
+-#
+---format=gnu --no-params
+-BgFilter__9ivTSolverP12ivInteractor
+-ivTSolver::BgFilter(ivInteractor *)
+-ivTSolver::BgFilter
+-#
+---format=gnu --no-params
+-Check__6UArrayi
+-UArray::Check(int)
+-UArray::Check
+-#
+---format=gnu --no-params
+-CoreConstDecls__8TextCodeR7ostream
+-TextCode::CoreConstDecls(ostream &)
+-TextCode::CoreConstDecls
+-#
+---format=gnu --no-params
+-CoreConstDecls__8TextCodeO7ostream
+-TextCode::CoreConstDecls(ostream &&)
+-TextCode::CoreConstDecls
+-#
+---format=gnu --no-params
+-Detach__8StateVarP12StateVarView
+-StateVar::Detach(StateVarView *)
+-StateVar::Detach
+-#
+---format=gnu --no-params
+-Done__9ComponentG8Iterator
+-Component::Done(Iterator)
+-Component::Done
+-#
+---format=gnu --no-params
+-Effect__11RelateManipR7ivEvent
+-RelateManip::Effect(ivEvent &)
+-RelateManip::Effect
+-#
+---format=gnu --no-params
+-Effect__11RelateManipO7ivEvent
+-RelateManip::Effect(ivEvent &&)
+-RelateManip::Effect
+-#
+---format=gnu --no-params
+-FindFixed__FRP4CNetP4CNet
+-FindFixed(CNet *&, CNet *)
+-FindFixed
+-#
+---format=gnu --no-params
+-FindFixed__FOP4CNetP4CNet
+-FindFixed(CNet *&&, CNet *)
+-FindFixed
+-#
+---format=gnu --no-params
+-Fix48_abort__FR8twolongs
+-Fix48_abort(twolongs &)
+-Fix48_abort
+-#
+---format=gnu --no-params
+-Fix48_abort__FO8twolongs
+-Fix48_abort(twolongs &&)
+-Fix48_abort
+-#
+---format=gnu --no-params
+-GetBarInfo__15iv2_6_VScrollerP13ivPerspectiveRiT2
+-iv2_6_VScroller::GetBarInfo(ivPerspective *, int &, int &)
+-iv2_6_VScroller::GetBarInfo
+-#
+---format=gnu --no-params
+-GetBarInfo__15iv2_6_VScrollerP13ivPerspectiveOiT2
+-iv2_6_VScroller::GetBarInfo(ivPerspective *, int &&, int &&)
+-iv2_6_VScroller::GetBarInfo
+-#
+---format=gnu --no-params
+-GetBgColor__C9ivPainter
+-ivPainter::GetBgColor(void) const
+-ivPainter::GetBgColor
+-#
+---format=gnu --no-params
+-InsertBody__15H_PullrightMenuii
+-H_PullrightMenu::InsertBody(int, int)
+-H_PullrightMenu::InsertBody
+-#
+---format=gnu --no-params
+-InsertCharacter__9TextManipc
+-TextManip::InsertCharacter(char)
+-TextManip::InsertCharacter
+-#
+---format=gnu --no-params
+-InsertToplevel__7ivWorldP12ivInteractorT1
+-ivWorld::InsertToplevel(ivInteractor *, ivInteractor *)
+-ivWorld::InsertToplevel
+-#
+---format=gnu --no-params
+-InsertToplevel__7ivWorldP12ivInteractorT1iiUi
+-ivWorld::InsertToplevel(ivInteractor *, ivInteractor *, int, int, unsigned int)
+-ivWorld::InsertToplevel
+-#
+---format=gnu --no-params
+-IsAGroup__FP11GraphicViewP11GraphicComp
+-IsAGroup(GraphicView *, GraphicComp *)
+-IsAGroup
+-#
+---format=gnu --no-params
+-IsA__10ButtonCodeUl
+-ButtonCode::IsA(unsigned long)
+-ButtonCode::IsA
+-#
+---format=gnu --no-params
+-ReadName__FR7istreamPc
+-ReadName(istream &, char *)
+-ReadName
+-#
+---format=gnu --no-params
+-Redraw__13StringBrowseriiii
+-StringBrowser::Redraw(int, int, int, int)
+-StringBrowser::Redraw
+-#
+---format=gnu --no-params
+-Rotate__13ivTransformerf
+-ivTransformer::Rotate(float)
+-ivTransformer::Rotate
+-#
+---format=gnu --no-params
+-Rotated__C13ivTransformerf
+-ivTransformer::Rotated(float) const
+-ivTransformer::Rotated
+-#
+---format=gnu --no-params
+-Round__Ff
+-Round(float)
+-Round
+-#
+---format=gnu --no-params
+-SetExport__16MemberSharedNameUi
+-MemberSharedName::SetExport(unsigned int)
+-MemberSharedName::SetExport
+-#
+---format=gnu --no-params
+-Set__14ivControlState13ControlStatusUi
+-ivControlState::Set(ControlStatus, unsigned int)
+-ivControlState::Set
+-#
+---format=gnu --no-params
+-Set__5DFacePcii
+-DFace::Set(char *, int, int)
+-DFace::Set
+-#
+---format=gnu --no-params
+-VConvert__9ivTSolverP12ivInteractorRP8TElementT2
+-ivTSolver::VConvert(ivInteractor *, TElement *&, TElement *&)
+-ivTSolver::VConvert
+-#
+---format=gnu --no-params
+-VConvert__9ivTSolverP7ivTGlueRP8TElement
+-ivTSolver::VConvert(ivTGlue *, TElement *&)
+-ivTSolver::VConvert
+-#
+---format=gnu --no-params
+-VOrder__9ivTSolverUiRP12ivInteractorT2
+-ivTSolver::VOrder(unsigned int, ivInteractor *&, ivInteractor *&)
+-ivTSolver::VOrder
+-#
+---format=gnu --no-params
+-_10PageButton$__both
+-PageButton::__both
+-PageButton::__both
+-#
+---format=gnu --no-params
+-_3RNG$singleMantissa
+-RNG::singleMantissa
+-RNG::singleMantissa
+-#
+---format=gnu --no-params
+-_5IComp$_release
+-IComp::_release
+-IComp::_release
+-#
+---format=gnu --no-params
+-_$_10BitmapComp
+-BitmapComp::~BitmapComp(void)
+-BitmapComp::~BitmapComp
+-#
+---format=gnu --no-params
+-_$_9__io_defs
+-__io_defs::~__io_defs(void)
+-__io_defs::~__io_defs
+-#
+---format=gnu --no-params
+-_$_Q23foo3bar
+-foo::bar::~bar(void)
+-foo::bar::~bar
+-#
+---format=gnu --no-params
+-_$_Q33foo3bar4bell
+-foo::bar::bell::~bell(void)
+-foo::bar::bell::~bell
+-#
+---format=gnu --no-params
+-__10ivTelltaleiP7ivGlyph
+-ivTelltale::ivTelltale(int, ivGlyph *)
+-ivTelltale::ivTelltale
+-#
+---format=gnu --no-params
+-__10ivViewportiP12ivInteractorUi
+-ivViewport::ivViewport(int, ivInteractor *, unsigned int)
+-ivViewport::ivViewport
+-#
+---format=gnu --no-params
+-__10ostrstream
+-ostrstream::ostrstream(void)
+-ostrstream::ostrstream
+-#
+---format=gnu --no-params
+-__10ostrstreamPcii
+-ostrstream::ostrstream(char *, int, int)
+-ostrstream::ostrstream
+-#
+---format=gnu --no-params
+-__11BitmapTablei
+-BitmapTable::BitmapTable(int)
+-BitmapTable::BitmapTable
+-#
+---format=gnu --no-params
+-__12ViewportCodeP12ViewportComp
+-ViewportCode::ViewportCode(ViewportComp *)
+-ViewportCode::ViewportCode
+-#
+---format=gnu --no-params
+-__12iv2_6_Borderii
+-iv2_6_Border::iv2_6_Border(int, int)
+-iv2_6_Border::iv2_6_Border
+-#
+---format=gnu --no-params
+-__12ivBreak_Listl
+-ivBreak_List::ivBreak_List(long)
+-ivBreak_List::ivBreak_List
+-#
+---format=gnu --no-params
+-__14iv2_6_MenuItemiP12ivInteractor
+-iv2_6_MenuItem::iv2_6_MenuItem(int, ivInteractor *)
+-iv2_6_MenuItem::iv2_6_MenuItem
+-#
+---format=gnu --no-params
+-__20DisplayList_IteratorR11DisplayList
+-DisplayList_Iterator::DisplayList_Iterator(DisplayList &)
+-DisplayList_Iterator::DisplayList_Iterator
+-#
+---format=gnu --no-params
+-__3fooRT0
+-foo::foo(foo &)
+-foo::foo
+-#
+---format=gnu --no-params
+-__3fooiN31
+-foo::foo(int, int, int, int)
+-foo::foo
+-#
+---format=gnu --no-params
+-__3fooiRT0iT2iT2
+-foo::foo(int, foo &, int, foo &, int, foo &)
+-foo::foo
+-#
+---format=gnu --no-params
+-__6KeyMapPT0
+-KeyMap::KeyMap(KeyMap *)
+-KeyMap::KeyMap
+-#
+---format=gnu --no-params
+-__8ArrowCmdP6EditorUiUi
+-ArrowCmd::ArrowCmd(Editor *, unsigned int, unsigned int)
+-ArrowCmd::ArrowCmd
+-#
+---format=gnu --no-params
+-__9F_EllipseiiiiP7Graphic
+-F_Ellipse::F_Ellipse(int, int, int, int, Graphic *)
+-F_Ellipse::F_Ellipse
+-#
+---format=gnu --no-params
+-__9FrameDataP9FrameCompi
+-FrameData::FrameData(FrameComp *, int)
+-FrameData::FrameData
+-#
+---format=gnu --no-params
+-__9HVGraphicP9CanvasVarP7Graphic
+-HVGraphic::HVGraphic(CanvasVar *, Graphic *)
+-HVGraphic::HVGraphic
+-#
+---format=gnu --no-params
+-__Q23foo3bar
+-foo::bar::bar(void)
+-foo::bar::bar
+-#
+---format=gnu --no-params
+-__Q33foo3bar4bell
+-foo::bar::bell::bell(void)
+-foo::bar::bell::bell
+-#
+---format=gnu --no-params
+-__aa__3fooRT0
+-foo::operator&&(foo &)
+-foo::operator&&
+-#
+---format=gnu --no-params
+-__aad__3fooRT0
+-foo::operator&=(foo &)
+-foo::operator&=
+-#
+---format=gnu --no-params
+-__ad__3fooRT0
+-foo::operator&(foo &)
+-foo::operator&
+-#
+---format=gnu --no-params
+-__adv__3fooRT0
+-foo::operator/=(foo &)
+-foo::operator/=
+-#
+---format=gnu --no-params
+-__aer__3fooRT0
+-foo::operator^=(foo &)
+-foo::operator^=
+-#
+---format=gnu --no-params
+-__als__3fooRT0
+-foo::operator<<=(foo &)
+-foo::operator<<=
+-#
+---format=gnu --no-params
+-__amd__3fooRT0
+-foo::operator%=(foo &)
+-foo::operator%=
+-#
+---format=gnu --no-params
+-__ami__3fooRT0
+-foo::operator-=(foo &)
+-foo::operator-=
+-#
+---format=gnu --no-params
+-__aml__3FixRT0
+-Fix::operator*=(Fix &)
+-Fix::operator*=
+-#
+---format=gnu --no-params
+-__aml__5Fix16i
+-Fix16::operator*=(int)
+-Fix16::operator*=
+-#
+---format=gnu --no-params
+-__aml__5Fix32RT0
+-Fix32::operator*=(Fix32 &)
+-Fix32::operator*=
+-#
+---format=gnu --no-params
+-__aor__3fooRT0
+-foo::operator|=(foo &)
+-foo::operator|=
+-#
+---format=gnu --no-params
+-__apl__3fooRT0
+-foo::operator+=(foo &)
+-foo::operator+=
+-#
+---format=gnu --no-params
+-__ars__3fooRT0
+-foo::operator>>=(foo &)
+-foo::operator>>=
+-#
+---format=gnu --no-params
+-__as__3fooRT0
+-foo::operator=(foo &)
+-foo::operator=
+-#
+---format=gnu --no-params
+-__cl__3fooRT0
+-foo::operator()(foo &)
+-foo::operator()
+-#
+---format=gnu --no-params
+-__cl__6Normal
+-Normal::operator()(void)
+-Normal::operator()
+-#
+---format=gnu --no-params
+-__cl__6Stringii
+-String::operator()(int, int)
+-String::operator()
+-#
+---format=gnu --no-params
+-__cm__3fooRT0
+-foo::operator, (foo &)
+-foo::operator, 
+-#
+---format=gnu --no-params
+-__co__3foo
+-foo::operator~(void)
+-foo::operator~
+-#
+---format=gnu --no-params
+-__dl__3fooPv
+-foo::operator delete(void *)
+-foo::operator delete
+-#
+---format=gnu --no-params
+-__dv__3fooRT0
+-foo::operator/(foo &)
+-foo::operator/
+-#
+---format=gnu --no-params
+-__eq__3fooRT0
+-foo::operator==(foo &)
+-foo::operator==
+-#
+---format=gnu --no-params
+-__er__3fooRT0
+-foo::operator^(foo &)
+-foo::operator^
+-#
+---format=gnu --no-params
+-__ge__3fooRT0
+-foo::operator>=(foo &)
+-foo::operator>=
+-#
+---format=gnu --no-params
+-__gt__3fooRT0
+-foo::operator>(foo &)
+-foo::operator>
+-#
+---format=gnu --no-params
+-__le__3fooRT0
+-foo::operator<=(foo &)
+-foo::operator<=
+-#
+---format=gnu --no-params
+-__ls__3fooRT0
+-foo::operator<<(foo &)
+-foo::operator<<
+-#
+---format=gnu --no-params
+-__ls__FR7ostreamPFR3ios_R3ios
+-operator<<(ostream &, ios &(*)(ios &))
+-operator<<
+-#
+---format=gnu --no-params
+-__ls__FR7ostreamR3Fix
+-operator<<(ostream &, Fix &)
+-operator<<
+-#
+---format=gnu --no-params
+-__lt__3fooRT0
+-foo::operator<(foo &)
+-foo::operator<
+-#
+---format=gnu --no-params
+-__md__3fooRT0
+-foo::operator%(foo &)
+-foo::operator%
+-#
+---format=gnu --no-params
+-__mi__3fooRT0
+-foo::operator-(foo &)
+-foo::operator-
+-#
+---format=gnu --no-params
+-__ml__3fooRT0
+-foo::operator*(foo &)
+-foo::operator*
+-#
+---format=gnu --no-params
+-__mm__3fooi
+-foo::operator--(int)
+-foo::operator--
+-#
+---format=gnu --no-params
+-__ne__3fooRT0
+-foo::operator!=(foo &)
+-foo::operator!=
+-#
+---format=gnu --no-params
+-__nt__3foo
+-foo::operator!(void)
+-foo::operator!
+-#
+---format=gnu --no-params
+-__nw__3fooi
+-foo::operator new(int)
+-foo::operator new
+-#
+---format=gnu --no-params
+-__oo__3fooRT0
+-foo::operator||(foo &)
+-foo::operator||
+-#
+---format=gnu --no-params
+-__opPc__3foo
+-foo::operator char *(void)
+-foo::operator char *
+-#
+---format=gnu --no-params
+-__opi__3foo
+-foo::operator int(void)
+-foo::operator int
+-#
+---format=gnu --no-params
+-__or__3fooRT0
+-foo::operator|(foo &)
+-foo::operator|
+-#
+---format=gnu --no-params
+-__pl__3fooRT0
+-foo::operator+(foo &)
+-foo::operator+
+-#
+---format=gnu --no-params
+-__pp__3fooi
+-foo::operator++(int)
+-foo::operator++
+-#
+---format=gnu --no-params
+-__rf__3foo
+-foo::operator->(void)
+-foo::operator->
+-#
+---format=gnu --no-params
+-__rm__3fooRT0
+-foo::operator->*(foo &)
+-foo::operator->*
+-#
+---format=gnu --no-params
+-__rs__3fooRT0
+-foo::operator>>(foo &)
+-foo::operator>>
+-#
+---format=gnu --no-params
+-_new_Fix__FUs
+-_new_Fix(unsigned short)
+-_new_Fix
+-#
+---format=gnu --no-params
+-_vt.foo
+-foo virtual table
+-foo virtual table
+-#
+---format=gnu --no-params
+-_vt.foo.bar
+-foo::bar virtual table
+-foo::bar virtual table
+-#
+---format=gnu --no-params
+-_vt$foo
+-foo virtual table
+-foo virtual table
+-#
+---format=gnu --no-params
+-_vt$foo$bar
+-foo::bar virtual table
+-foo::bar virtual table
+-#
+---format=gnu --no-params
+-append__7ivGlyphPT0
+-ivGlyph::append(ivGlyph *)
+-ivGlyph::append
+-#
+---format=gnu --no-params
+-clearok__FP7_win_sti
+-clearok(_win_st *, int)
+-clearok
+-#
+---format=gnu --no-params
+-complexfunc2__FPFPc_i
+-complexfunc2(int (*)(char *))
+-complexfunc2
+-#
+---format=gnu --no-params
+-complexfunc3__FPFPFPl_s_i
+-complexfunc3(int (*)(short (*)(long *)))
+-complexfunc3
+-#
+---format=gnu --no-params
+-complexfunc4__FPFPFPc_s_i
+-complexfunc4(int (*)(short (*)(char *)))
+-complexfunc4
+-#
+---format=gnu --no-params
+-complexfunc5__FPFPc_PFl_i
+-complexfunc5(int (*(*)(char *))(long))
+-complexfunc5
+-#
+---format=gnu --no-params
+-complexfunc6__FPFPi_PFl_i
+-complexfunc6(int (*(*)(int *))(long))
+-complexfunc6
+-#
+---format=gnu --no-params
+-complexfunc7__FPFPFPc_i_PFl_i
+-complexfunc7(int (*(*)(int (*)(char *)))(long))
+-complexfunc7
+-#
+---format=gnu --no-params
+-foo__FiN30
+-foo(int, int, int, int)
+-foo
+-#
+---format=gnu --no-params
+-foo__FiR3fooiT1iT1
+-foo(int, foo &, int, foo &, int, foo &)
+-foo
+-#
+---format=gnu --no-params
+-foo___3barl
+-bar::foo_(long)
+-bar::foo_
+-#
+---format=gnu --no-params
+-insert__15ivClippingStacklRP8_XRegion
+-ivClippingStack::insert(long, _XRegion *&)
+-ivClippingStack::insert
+-#
+---format=gnu --no-params
+-insert__16ChooserInfo_ListlR11ChooserInfo
+-ChooserInfo_List::insert(long, ChooserInfo &)
+-ChooserInfo_List::insert
+-#
+---format=gnu --no-params
+-insert__17FontFamilyRepListlRP15ivFontFamilyRep
+-FontFamilyRepList::insert(long, ivFontFamilyRep *&)
+-FontFamilyRepList::insert
+-#
+---format=gnu --no-params
+-leaveok__FP7_win_stc
+-leaveok(_win_st *, char)
+-leaveok
+-#
+---format=gnu --no-params
+-left_mover__C7ivMFKitP12ivAdjustableP7ivStyle
+-ivMFKit::left_mover(ivAdjustable *, ivStyle *) const
+-ivMFKit::left_mover
+-#
+---format=gnu --no-params
+-overload1arg__FSc
+-overload1arg(signed char)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__FUc
+-overload1arg(unsigned char)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__FUi
+-overload1arg(unsigned int)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__FUl
+-overload1arg(unsigned long)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__FUs
+-overload1arg(unsigned short)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fc
+-overload1arg(char)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fd
+-overload1arg(double)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Ff
+-overload1arg(float)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fi
+-overload1arg(int)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fl
+-overload1arg(long)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fs
+-overload1arg(short)
+-overload1arg
+-#
+---format=gnu --no-params
+-overload1arg__Fv
+-overload1arg(void)
+-overload1arg
+-#
+---format=gnu --no-params
+-overloadargs__Fi
+-overloadargs(int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fii
+-overloadargs(int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiii
+-overloadargs(int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiii
+-overloadargs(int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiii
+-overloadargs(int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiii
+-overloadargs(int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiiii
+-overloadargs(int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiiiii
+-overloadargs(int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiiiiii
+-overloadargs(int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiiiiiii
+-overloadargs(int, int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-overloadargs__Fiiiiiiiiiii
+-overloadargs(int, int, int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=gnu --no-params
+-poke__8ivRasterUlUlffff
+-ivRaster::poke(unsigned long, unsigned long, float, float, float, float)
+-ivRaster::poke
+-#
+---format=gnu --no-params
+-polar__Fdd
+-polar(double, double)
+-polar
+-#
+---format=gnu --no-params
+-scale__13ivTransformerff
+-ivTransformer::scale(float, float)
+-ivTransformer::scale
+-#
+---format=gnu --no-params
+-sgetn__7filebufPci
+-filebuf::sgetn(char *, int)
+-filebuf::sgetn
+-#
+---format=gnu --no-params
+-shift__FP5_FrepiT0
+-shift(_Frep *, int, _Frep *)
+-shift
+-#
+---format=gnu --no-params
+-test__C6BitSeti
+-BitSet::test(int) const
+-BitSet::test
+-#
+---format=gnu --no-params
+-test__C6BitSetii
+-BitSet::test(int, int) const
+-BitSet::test
+-#
+---format=gnu --no-params
+-text_source__8Documentl
+-Document::text_source(long)
+-Document::text_source
+-#
+---format=gnu --no-params
+-variance__6Erlangd
+-Erlang::variance(double)
+-Erlang::variance
+-#
+---format=gnu --no-params
+-view__14DocumentViewerP8ItemViewP11TabularItem
+-DocumentViewer::view(ItemView *, TabularItem *)
+-DocumentViewer::view
+-#
+---format=gnu --no-params
+-xy_extents__11ivExtensionffff
+-ivExtension::xy_extents(float, float, float, float)
+-ivExtension::xy_extents
+-#
+---format=gnu --no-params
+-zero__8osMemoryPvUi
+-osMemory::zero(void *, unsigned int)
+-osMemory::zero
+-#
+---format=gnu --no-params
+-_2T4$N
+-T4::N
+-T4::N
+-#
+---format=gnu --no-params
+-_Q22T42t1$N
+-T4::t1::N
+-T4::t1::N
+-#
+---format=gnu --no-params
+-get__2T1
+-T1::get(void)
+-T1::get
+-#
+---format=gnu --no-params
+-get__Q22T11a
+-T1::a::get(void)
+-T1::a::get
+-#
+---format=gnu --no-params
+-get__Q32T11a1b
+-T1::a::b::get(void)
+-T1::a::b::get
+-#
+---format=gnu --no-params
+-get__Q42T11a1b1c
+-T1::a::b::c::get(void)
+-T1::a::b::c::get
+-#
+---format=gnu --no-params
+-get__Q52T11a1b1c1d
+-T1::a::b::c::d::get(void)
+-T1::a::b::c::d::get
+-#
+---format=gnu --no-params
+-put__2T1i
+-T1::put(int)
+-T1::put
+-#
+---format=gnu --no-params
+-put__Q22T11ai
+-T1::a::put(int)
+-T1::a::put
+-#
+---format=gnu --no-params
+-put__Q32T11a1bi
+-T1::a::b::put(int)
+-T1::a::b::put
+-#
+---format=gnu --no-params
+-put__Q42T11a1b1ci
+-T1::a::b::c::put(int)
+-T1::a::b::c::put
+-#
+---format=gnu --no-params
+-put__Q52T11a1b1c1di
+-T1::a::b::c::d::put(int)
+-T1::a::b::c::d::put
+-#
+---format=gnu --no-params
+-bar__3fooPv
+-foo::bar(void *)
+-foo::bar
+-#
+---format=gnu --no-params
+-bar__C3fooPv
+-foo::bar(void *) const
+-foo::bar
+-#
+---format=gnu --no-params
+-__eq__3fooRT0
+-foo::operator==(foo &)
+-foo::operator==
+-#
+---format=gnu --no-params
+-__eq__C3fooR3foo
+-foo::operator==(foo &) const
+-foo::operator==
+-#
+---format=gnu --no-params
+-elem__t6vector1Zdi
+-vector<double>::elem(int)
+-vector<double>::elem
+-#
+---format=gnu --no-params
+-elem__t6vector1Zii
+-vector<int>::elem(int)
+-vector<int>::elem
+-#
+---format=gnu --no-params
+-__t6vector1Zdi
+-vector<double>::vector(int)
+-vector<double>::vector
+-#
+---format=gnu --no-params
+-__t6vector1Zii
+-vector<int>::vector(int)
+-vector<int>::vector
+-#
+---format=gnu --no-params
+-_$_t6vector1Zdi
+-vector<double>::~vector(int)
+-vector<double>::~vector
+-#
+---format=gnu --no-params
+-_$_t6vector1Zii
+-vector<int>::~vector(int)
+-vector<int>::~vector
+-#
+---format=gnu --no-params
+-__nw__t2T11ZcUi
+-T1<char>::operator new(unsigned int)
+-T1<char>::operator new
+-#
+---format=gnu --no-params
+-__nw__t2T11Z1tUi
+-T1<t>::operator new(unsigned int)
+-T1<t>::operator new
+-#
+---format=gnu --no-params
+-__dl__t2T11ZcPv
+-T1<char>::operator delete(void *)
+-T1<char>::operator delete
+-#
+---format=gnu --no-params
+-__dl__t2T11Z1tPv
+-T1<t>::operator delete(void *)
+-T1<t>::operator delete
+-#
+---format=gnu --no-params
+-__t2T11Zci
+-T1<char>::T1(int)
+-T1<char>::T1
+-#
+---format=gnu --no-params
+-__t2T11Zc
+-T1<char>::T1(void)
+-T1<char>::T1
+-#
+---format=gnu --no-params
+-__t2T11Z1ti
+-T1<t>::T1(int)
+-T1<t>::T1
+-#
+---format=gnu --no-params
+-__t2T11Z1t
+-T1<t>::T1(void)
+-T1<t>::T1
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity3Pix
+-List<VHDLEntity>::Pix::Pix(void)
+-List<VHDLEntity>::Pix::Pix
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity3PixPQ2t4List1Z10VHDLEntity7element
+-List<VHDLEntity>::Pix::Pix(List<VHDLEntity>::element *)
+-List<VHDLEntity>::Pix::Pix
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity3PixRCQ2t4List1Z10VHDLEntity3Pix
+-List<VHDLEntity>::Pix::Pix(List<VHDLEntity>::Pix const &)
+-List<VHDLEntity>::Pix::Pix
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity3PixOCQ2t4List1Z10VHDLEntity3Pix
+-List<VHDLEntity>::Pix::Pix(List<VHDLEntity>::Pix const &&)
+-List<VHDLEntity>::Pix::Pix
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity7elementRC10VHDLEntityPT0
+-List<VHDLEntity>::element::element(VHDLEntity const &, List<VHDLEntity>::element *)
+-List<VHDLEntity>::element::element
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity7elementOC10VHDLEntityPT0
+-List<VHDLEntity>::element::element(VHDLEntity const &&, List<VHDLEntity>::element *)
+-List<VHDLEntity>::element::element
+-#
+---format=gnu --no-params
+-__Q2t4List1Z10VHDLEntity7elementRCQ2t4List1Z10VHDLEntity7element
+-List<VHDLEntity>::element::element(List<VHDLEntity>::element const &)
+-List<VHDLEntity>::element::element
+-#
+---format=gnu --no-params
+-__cl__C11VHDLLibraryGt4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-VHDLLibrary::operator()(PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >) const
+-VHDLLibrary::operator()
+-#
+---format=gnu --no-params
+-__cl__Ct4List1Z10VHDLEntityRCQ2t4List1Z10VHDLEntity3Pix
+-List<VHDLEntity>::operator()(List<VHDLEntity>::Pix const &) const
+-List<VHDLEntity>::operator()
+-#
+---format=gnu --no-params
+-__ne__FPvRCQ2t4List1Z10VHDLEntity3Pix
+-operator!=(void *, List<VHDLEntity>::Pix const &)
+-operator!=
+-#
+---format=gnu --no-params
+-__ne__FPvRCt4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-operator!=(void *, PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> > const &)
+-operator!=
+-#
+---format=gnu --no-params
+-__t4List1Z10VHDLEntityRCt4List1Z10VHDLEntity
+-List<VHDLEntity>::List(List<VHDLEntity> const &)
+-List<VHDLEntity>::List
+-#
+---format=gnu --no-params
+-__t4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX(void)
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX
+-#
+---format=gnu --no-params
+-__t4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntityP14VHDLLibraryRepGQ2t4List1Z10VHDLEntity3Pix
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX(VHDLLibraryRep *, List<VHDLEntity>::Pix)
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX
+-#
+---format=gnu --no-params
+-__t4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntityRCt4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX(PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> > const &)
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX
+-#
+---format=gnu --no-params
+-__t4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntityOCt4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX(PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> > const &&)
+-PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> >::PixX
+-#
+---format=gnu --no-params
+-nextE__C11VHDLLibraryRt4PixX3Z11VHDLLibraryZ14VHDLLibraryRepZt4List1Z10VHDLEntity
+-VHDLLibrary::nextE(PixX<VHDLLibrary, VHDLLibraryRep, List<VHDLEntity> > &) const
+-VHDLLibrary::nextE
+-#
+---format=gnu --no-params
+-next__Ct4List1Z10VHDLEntityRQ2t4List1Z10VHDLEntity3Pix
+-List<VHDLEntity>::next(List<VHDLEntity>::Pix &) const
+-List<VHDLEntity>::next
+-#
+---format=gnu --no-params
+-_GLOBAL_$D$set
+-global destructors keyed to set
+-global destructors keyed to set
+-#
+---format=gnu --no-params
+-_GLOBAL_$I$set
+-global constructors keyed to set
+-global constructors keyed to set
+-#
+---format=gnu --no-params
+-__as__t5ListS1ZUiRCt5ListS1ZUi
+-ListS<unsigned int>::operator=(ListS<unsigned int> const &)
+-ListS<unsigned int>::operator=
+-#
+---format=gnu --no-params
+-__cl__Ct5ListS1ZUiRCQ2t5ListS1ZUi3Vix
+-ListS<unsigned int>::operator()(ListS<unsigned int>::Vix const &) const
+-ListS<unsigned int>::operator()
+-#
+---format=gnu --no-params
+-__cl__Ct5SetLS1ZUiRCQ2t5SetLS1ZUi3Vix
+-SetLS<unsigned int>::operator()(SetLS<unsigned int>::Vix const &) const
+-SetLS<unsigned int>::operator()
+-#
+---format=gnu --no-params
+-__t10ListS_link1ZUiRCUiPT0
+-ListS_link<unsigned int>::ListS_link(unsigned int const &, ListS_link<unsigned int> *)
+-ListS_link<unsigned int>::ListS_link
+-#
+---format=gnu --no-params
+-__t10ListS_link1ZUiRCt10ListS_link1ZUi
+-ListS_link<unsigned int>::ListS_link(ListS_link<unsigned int> const &)
+-ListS_link<unsigned int>::ListS_link
+-#
+---format=gnu --no-params
+-__t5ListS1ZUiRCt5ListS1ZUi
+-ListS<unsigned int>::ListS(ListS<unsigned int> const &)
+-ListS<unsigned int>::ListS
+-#
+---format=gnu --no-params
+-next__Ct5ListS1ZUiRQ2t5ListS1ZUi3Vix
+-ListS<unsigned int>::next(ListS<unsigned int>::Vix &) const
+-ListS<unsigned int>::next
+-#
+---format=gnu --no-params
+-__ne__FPvRCQ2t5SetLS1ZUi3Vix
+-operator!=(void *, SetLS<unsigned int>::Vix const &)
+-operator!=
+-#
+---format=gnu --no-params
+-__t8ListElem1Z5LabelRt4List1Z5Label
+-ListElem<Label>::ListElem(List<Label> &)
+-ListElem<Label>::ListElem
+-#
+---format=gnu --no-params
+-__t8BDDHookV1ZPcRCPc
+-BDDHookV<char *>::BDDHookV(char *const &)
+-BDDHookV<char *>::BDDHookV
+-#
+---format=gnu --no-params
+-_vt$t8BDDHookV1ZPc
+-BDDHookV<char *> virtual table
+-BDDHookV<char *> virtual table
+-#
+---format=gnu --no-params
+-__ne__FPvRCQ211BDDFunction4VixB
+-operator!=(void *, BDDFunction::VixB const &)
+-operator!=
+-#
+---format=gnu --no-params
+-__eq__FPvRCQ211BDDFunction4VixB
+-operator==(void *, BDDFunction::VixB const &)
+-operator==
+-#
+---format=gnu --no-params
+-relativeId__CQ36T_phi210T_preserve8FPC_nextRCQ26T_phi210T_preserveRC10Parameters
+-T_phi2::T_preserve::FPC_next::relativeId(T_phi2::T_preserve const &, Parameters const &) const
+-T_phi2::T_preserve::FPC_next::relativeId
+-#
+---format=lucid --no-params
+-WS__FR7istream
+-WS(istream &)
+-WS
+-#
+---format=lucid --no-params
+-__aa__3fooFR3foo
+-foo::operator&&(foo &)
+-foo::operator&&
+-#
+---format=lucid --no-params
+-__aad__3fooFR3foo
+-foo::operator&=(foo &)
+-foo::operator&=
+-#
+---format=lucid --no-params
+-__ad__3fooFR3foo
+-foo::operator&(foo &)
+-foo::operator&
+-#
+---format=lucid --no-params
+-__adv__3fooFR3foo
+-foo::operator/=(foo &)
+-foo::operator/=
+-#
+---format=lucid --no-params
+-__adv__7complexF7complex
+-complex::operator/=(complex)
+-complex::operator/=
+-#
+---format=lucid --no-params
+-__aer__3fooFR3foo
+-foo::operator^=(foo &)
+-foo::operator^=
+-#
+---format=lucid --no-params
+-__als__3fooFR3foo
+-foo::operator<<=(foo &)
+-foo::operator<<=
+-#
+---format=lucid --no-params
+-__amd__3fooFR3foo
+-foo::operator%=(foo &)
+-foo::operator%=
+-#
+---format=lucid --no-params
+-__ami__3fooFR3foo
+-foo::operator-=(foo &)
+-foo::operator-=
+-#
+---format=lucid --no-params
+-__amu__3fooFR3foo
+-foo::operator*=(foo &)
+-foo::operator*=
+-#
+---format=lucid --no-params
+-__amu__7complexF7complex
+-complex::operator*=(complex)
+-complex::operator*=
+-#
+---format=lucid --no-params
+-__aor__3fooFR3foo
+-foo::operator|=(foo &)
+-foo::operator|=
+-#
+---format=lucid --no-params
+-__apl__3fooFR3foo
+-foo::operator+=(foo &)
+-foo::operator+=
+-#
+---format=lucid --no-params
+-__ars__3fooFR3foo
+-foo::operator>>=(foo &)
+-foo::operator>>=
+-#
+---format=lucid --no-params
+-__as__18istream_withassignFP9streambuf
+-istream_withassign::operator=(streambuf *)
+-istream_withassign::operator=
+-#
+---format=lucid --no-params
+-__as__18istream_withassignFR7istream
+-istream_withassign::operator=(istream &)
+-istream_withassign::operator=
+-#
+---format=lucid --no-params
+-__as__3fooFR3foo
+-foo::operator=(foo &)
+-foo::operator=
+-#
+---format=lucid --no-params
+-__as__3iosFR3ios
+-ios::operator=(ios &)
+-ios::operator=
+-#
+---format=lucid --no-params
+-__cl__3fooFR3foo
+-foo::operator()(foo &)
+-foo::operator()
+-#
+---format=lucid --no-params
+-__cm__3fooFR3foo
+-foo::operator, (foo &)
+-foo::operator, 
+-#
+---format=lucid --no-params
+-__co__3fooFv
+-foo::operator~(void)
+-foo::operator~
+-#
+---format=lucid --no-params
+-__ct__10istrstreamFPc
+-istrstream::istrstream(char *)
+-istrstream::istrstream
+-#
+---format=lucid --no-params
+-__ct__10istrstreamFPci
+-istrstream::istrstream(char *, int)
+-istrstream::istrstream
+-#
+---format=lucid --no-params
+-__ct__10ostrstreamFPciT2
+-ostrstream::ostrstream(char *, int, int)
+-ostrstream::ostrstream
+-#
+---format=lucid --no-params
+-__ct__10ostrstreamFv
+-ostrstream::ostrstream(void)
+-ostrstream::ostrstream
+-#
+---format=lucid --no-params
+-__ct__10smanip_intFPFR3iosi_R3iosi
+-smanip_int::smanip_int(ios &(*)(ios &, int), int)
+-smanip_int::smanip_int
+-#
+---format=lucid --no-params
+-__ct__10smanip_intFPFO3iosi_O3iosi
+-smanip_int::smanip_int(ios &&(*)(ios &&, int), int)
+-smanip_int::smanip_int
+-#
+---format=lucid --no-params
+-__ct__11fstreambaseFi
+-fstreambase::fstreambase(int)
+-fstreambase::fstreambase
+-#
+---format=lucid --no-params
+-__ct__11fstreambaseFiPcT1
+-fstreambase::fstreambase(int, char *, int)
+-fstreambase::fstreambase
+-#
+---format=lucid --no-params
+-__ct__11fstreambaseFv
+-fstreambase::fstreambase(void)
+-fstreambase::fstreambase
+-#
+---format=lucid --no-params
+-__ct__11smanip_longFPFR3iosl_R3iosl
+-smanip_long::smanip_long(ios &(*)(ios &, long), long)
+-smanip_long::smanip_long
+-#
+---format=lucid --no-params
+-__ct__11smanip_longFPFO3iosl_O3iosl
+-smanip_long::smanip_long(ios &&(*)(ios &&, long), long)
+-smanip_long::smanip_long
+-#
+---format=lucid --no-params
+-__ct__11stdiostreamFP4FILE
+-stdiostream::stdiostream(FILE *)
+-stdiostream::stdiostream
+-#
+---format=lucid --no-params
+-__ct__12strstreambufFPFl_PvPFPv_v
+-strstreambuf::strstreambuf(void *(*)(long), void (*)(void *))
+-strstreambuf::strstreambuf
+-#
+---format=lucid --no-params
+-__ct__12strstreambufFPUciT1
+-strstreambuf::strstreambuf(unsigned char *, int, unsigned char *)
+-strstreambuf::strstreambuf
+-#
+---format=lucid --no-params
+-__ct__12strstreambufFPciT1
+-strstreambuf::strstreambuf(char *, int, char *)
+-strstreambuf::strstreambuf
+-#
+---format=lucid --no-params
+-__ct__12strstreambufFi
+-strstreambuf::strstreambuf(int)
+-strstreambuf::strstreambuf
+-#
+---format=lucid --no-params
+-__ct__12strstreambufFv
+-strstreambuf::strstreambuf(void)
+-strstreambuf::strstreambuf
+-#
+---format=lucid --no-params
+-__ct__13strstreambaseFPciT1
+-strstreambase::strstreambase(char *, int, char *)
+-strstreambase::strstreambase
+-#
+---format=lucid --no-params
+-__ct__3fooFR3foo
+-foo::foo(foo &)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3fooFO3foo
+-foo::foo(foo &&)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3fooFi
+-foo::foo(int)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3fooFiN31
+-foo::foo(int, int, int, int)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3fooFiR3fooT1T2T1T2
+-foo::foo(int, foo &, int, foo &, int, foo &)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3fooFiO3fooT1T2T1T2
+-foo::foo(int, foo &&, int, foo &&, int, foo &&)
+-foo::foo
+-#
+---format=lucid --no-params
+-__ct__3iosFP9streambuf
+-ios::ios(streambuf *)
+-ios::ios
+-#
+---format=lucid --no-params
+-__ct__7filebufFiPcT1
+-filebuf::filebuf(int, char *, int)
+-filebuf::filebuf
+-#
+---format=lucid --no-params
+-__ct__7fstreamFiPcT1
+-fstream::fstream(int, char *, int)
+-fstream::fstream
+-#
+---format=lucid --no-params
+-__ct__7istreamFP9streambuf
+-istream::istream(streambuf *)
+-istream::istream
+-#
+---format=lucid --no-params
+-__ct__7istreamFP9streambufiP7ostream
+-istream::istream(streambuf *, int, ostream *)
+-istream::istream
+-#
+---format=lucid --no-params
+-__ct__7istreamFiPcT1
+-istream::istream(int, char *, int)
+-istream::istream
+-#
+---format=lucid --no-params
+-__ct__7istreamFiT1P7ostream
+-istream::istream(int, int, ostream *)
+-istream::istream
+-#
+---format=lucid --no-params
+-__ct__7ostreamFP9streambuf
+-ostream::ostream(streambuf *)
+-ostream::ostream
+-#
+---format=lucid --no-params
+-__ct__7ostreamFiPc
+-ostream::ostream(int, char *)
+-ostream::ostream
+-#
+---format=lucid --no-params
+-__ct__8ifstreamFiPcT1
+-ifstream::ifstream(int, char *, int)
+-ifstream::ifstream
+-#
+---format=lucid --no-params
+-__ct__Q23foo3barFv
+-foo::bar::bar(void)
+-foo::bar::bar
+-#
+---format=lucid --no-params
+-__ct__Q33foo3bar4bellFv
+-foo::bar::bell::bell(void)
+-foo::bar::bell::bell
+-#
+---format=lucid --no-params
+-__dl__3fooSFPv
+-foo::operator delete(void *) static
+-foo::operator delete
+-#
+---format=lucid --no-params
+-__dl__FPv
+-operator delete(void *)
+-operator delete
+-#
+---format=lucid --no-params
+-__dt__10istrstreamFv
+-istrstream::~istrstream(void)
+-istrstream::~istrstream
+-#
+---format=lucid --no-params
+-__dt__Q23foo3barFv
+-foo::bar::~bar(void)
+-foo::bar::~bar
+-#
+---format=lucid --no-params
+-__dt__Q33foo3bar4bellFv
+-foo::bar::bell::~bell(void)
+-foo::bar::bell::~bell
+-#
+---format=lucid --no-params
+-__dv__3fooFR3foo
+-foo::operator/(foo &)
+-foo::operator/
+-#
+---format=lucid --no-params
+-__dv__F7complexT1
+-operator/(complex, complex)
+-operator/
+-#
+---format=lucid --no-params
+-__eq__3fooFR3foo
+-foo::operator==(foo &)
+-foo::operator==
+-#
+---format=lucid --no-params
+-__er__3fooFR3foo
+-foo::operator^(foo &)
+-foo::operator^
+-#
+---format=lucid --no-params
+-__ge__3fooFR3foo
+-foo::operator>=(foo &)
+-foo::operator>=
+-#
+---format=lucid --no-params
+-__gt__3fooFR3foo
+-foo::operator>(foo &)
+-foo::operator>
+-#
+---format=lucid --no-params
+-__le__3fooFR3foo
+-foo::operator<=(foo &)
+-foo::operator<=
+-#
+---format=lucid --no-params
+-__ls__3fooFR3foo
+-foo::operator<<(foo &)
+-foo::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFP9streambuf
+-ostream::operator<<(streambuf *)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFPFR3ios_R3ios
+-ostream::operator<<(ios &(*)(ios &))
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFPv
+-ostream::operator<<(void *)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFUi
+-ostream::operator<<(unsigned int)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFUl
+-ostream::operator<<(unsigned long)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFd
+-ostream::operator<<(double)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFf
+-ostream::operator<<(float)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFi
+-ostream::operator<<(int)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__7ostreamFl
+-ostream::operator<<(long)
+-ostream::operator<<
+-#
+---format=lucid --no-params
+-__ls__FR7ostream7complex
+-operator<<(ostream &, complex)
+-operator<<
+-#
+---format=lucid --no-params
+-__lt__3fooFR3foo
+-foo::operator<(foo &)
+-foo::operator<
+-#
+---format=lucid --no-params
+-__md__3fooFR3foo
+-foo::operator%(foo &)
+-foo::operator%
+-#
+---format=lucid --no-params
+-__mi__3fooFR3foo
+-foo::operator-(foo &)
+-foo::operator-
+-#
+---format=lucid --no-params
+-__ml__3fooFR3foo
+-foo::operator*(foo &)
+-foo::operator*
+-#
+---format=lucid --no-params
+-__ml__F7complexT1
+-operator*(complex, complex)
+-operator*
+-#
+---format=lucid --no-params
+-__mm__3fooFi
+-foo::operator--(int)
+-foo::operator--
+-#
+---format=lucid --no-params
+-__ne__3fooFR3foo
+-foo::operator!=(foo &)
+-foo::operator!=
+-#
+---format=lucid --no-params
+-__nt__3fooFv
+-foo::operator!(void)
+-foo::operator!
+-#
+---format=lucid --no-params
+-__nw__3fooSFi
+-foo::operator new(int) static
+-foo::operator new
+-#
+---format=lucid --no-params
+-__nw__FUi
+-operator new(unsigned int)
+-operator new
+-#
+---format=lucid --no-params
+-__nw__FUiPv
+-operator new(unsigned int, void *)
+-operator new
+-#
+---format=lucid --no-params
+-__oo__3fooFR3foo
+-foo::operator||(foo &)
+-foo::operator||
+-#
+---format=lucid --no-params
+-__opPc__3fooFv
+-foo::operator char *(void)
+-foo::operator char *
+-#
+---format=lucid --no-params
+-__opi__3fooFv
+-foo::operator int(void)
+-foo::operator int
+-#
+---format=lucid --no-params
+-__or__3fooFR3foo
+-foo::operator|(foo &)
+-foo::operator|
+-#
+---format=lucid --no-params
+-__pl__3fooFR3foo
+-foo::operator+(foo &)
+-foo::operator+
+-#
+---format=lucid --no-params
+-__pp__3fooFi
+-foo::operator++(int)
+-foo::operator++
+-#
+---format=lucid --no-params
+-__pt__3fooFv
+-foo::operator->(void)
+-foo::operator->
+-#
+---format=lucid --no-params
+-__rm__3fooFR3foo
+-foo::operator->*(foo &)
+-foo::operator->*
+-#
+---format=lucid --no-params
+-__rs__3fooFR3foo
+-foo::operator>>(foo &)
+-foo::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFP9streambuf
+-istream::operator>>(streambuf *)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFPFR3ios_R3ios
+-istream::operator>>(ios &(*)(ios &))
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFPFR7istream_R7istream
+-istream::operator>>(istream &(*)(istream &))
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFPUc
+-istream::operator>>(unsigned char *)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFPc
+-istream::operator>>(char *)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRUi
+-istream::operator>>(unsigned int &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRUl
+-istream::operator>>(unsigned long &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRUs
+-istream::operator>>(unsigned short &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRd
+-istream::operator>>(double &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRf
+-istream::operator>>(float &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRi
+-istream::operator>>(int &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRl
+-istream::operator>>(long &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__7istreamFRs
+-istream::operator>>(short &)
+-istream::operator>>
+-#
+---format=lucid --no-params
+-__rs__FR7istreamR7complex
+-operator>>(istream &, complex &)
+-operator>>
+-#
+---format=lucid --no-params
+-__vtbl__10istrstream
+-istrstream virtual table
+-istrstream virtual table
+-#
+---format=lucid --no-params
+-__vtbl__17ostream__iostream__19iostream_withassign
+-iostream_withassign::ostream__iostream virtual table
+-iostream_withassign::ostream__iostream virtual table
+-#
+---format=lucid --no-params
+-__vtbl__3ios
+-ios virtual table
+-ios virtual table
+-#
+---format=lucid --no-params
+-__vtbl__3ios__13strstreambase
+-strstreambase::ios virtual table
+-strstreambase::ios virtual table
+-#
+---format=lucid --no-params
+-abs__F7complex
+-abs(complex)
+-abs
+-#
+---format=lucid --no-params
+-allocate__9streambufFv
+-streambuf::allocate(void)
+-streambuf::allocate
+-#
+---format=lucid --no-params
+-attach__11fstreambaseFi
+-fstreambase::attach(int)
+-fstreambase::attach
+-#
+---format=lucid --no-params
+-bitalloc__3iosSFv
+-ios::bitalloc(void) static
+-ios::bitalloc
+-#
+---format=lucid --no-params
+-chr__FiT1
+-chr(int, int)
+-chr
+-#
+---format=lucid --no-params
+-complex_error__FR11c_exception
+-complex_error(c_exception &)
+-complex_error
+-#
+---format=lucid --no-params
+-complexfunc2__FPFPc_i
+-complexfunc2(int (*)(char *))
+-complexfunc2
+-#
+---format=lucid --no-params
+-complexfunc3__FPFPFPl_s_i
+-complexfunc3(int (*)(short (*)(long *)))
+-complexfunc3
+-#
+---format=lucid --no-params
+-complexfunc4__FPFPFPc_s_i
+-complexfunc4(int (*)(short (*)(char *)))
+-complexfunc4
+-#
+---format=lucid --no-params
+-complexfunc5__FPFPc_PFl_i
+-complexfunc5(int (*(*)(char *))(long))
+-complexfunc5
+-#
+---format=lucid --no-params
+-complexfunc6__FPFPi_PFl_i
+-complexfunc6(int (*(*)(int *))(long))
+-complexfunc6
+-#
+---format=lucid --no-params
+-complexfunc7__FPFPFPc_i_PFl_i
+-complexfunc7(int (*(*)(int (*)(char *)))(long))
+-complexfunc7
+-#
+---format=lucid --no-params
+-complicated_put__7ostreamFc
+-ostream::complicated_put(char)
+-ostream::complicated_put
+-#
+---format=lucid --no-params
+-conv10__FlPc
+-conv10(long, char *)
+-conv10
+-#
+---format=lucid --no-params
+-conv16__FUlPc
+-conv16(unsigned long, char *)
+-conv16
+-#
+---format=lucid --no-params
+-dec__FR3ios
+-dec(ios &)
+-dec
+-#
+---format=lucid --no-params
+-dec__Fli
+-dec(long, int)
+-dec
+-#
+---format=lucid --no-params
+-dofield__FP7ostreamPciT2T3
+-dofield(ostream *, char *, int, char *, int)
+-dofield
+-#
+---format=lucid --no-params
+-flags__3iosFl
+-ios::flags(long)
+-ios::flags
+-#
+---format=lucid --no-params
+-flags__3iosFv
+-ios::flags(void)
+-ios::flags
+-#
+---format=lucid --no-params
+-foo__FiN31
+-foo(int, int, int, int)
+-foo
+-#
+---format=lucid --no-params
+-foo__FiR3fooT1T2T1T2
+-foo(int, foo &, int, foo &, int, foo &)
+-foo
+-#
+---format=lucid --no-params
+-foo__FiO3fooT1T2T1T2
+-foo(int, foo &&, int, foo &&, int, foo &&)
+-foo
+-#
+---format=lucid --no-params
+-foo___3barFl
+-bar::foo_(long)
+-bar::foo_
+-#
+---format=lucid --no-params
+-get__7istreamFPcic
+-istream::get(char *, int, char)
+-istream::get
+-#
+---format=lucid --no-params
+-get__7istreamFR9streambufc
+-istream::get(streambuf &, char)
+-istream::get
+-#
+---format=lucid --no-params
+-get_complicated__7istreamFRUc
+-istream::get_complicated(unsigned char &)
+-istream::get_complicated
+-#
+---format=lucid --no-params
+-get_complicated__7istreamFRc
+-istream::get_complicated(char &)
+-istream::get_complicated
+-#
+---format=lucid --no-params
+-getline__7istreamFPUcic
+-istream::getline(unsigned char *, int, char)
+-istream::getline
+-#
+---format=lucid --no-params
+-getline__7istreamFPcic
+-istream::getline(char *, int, char)
+-istream::getline
+-#
+---format=lucid --no-params
+-ignore__7istreamFiT1
+-istream::ignore(int, int)
+-istream::ignore
+-#
+---format=lucid --no-params
+-init__12strstreambufFPciT1
+-strstreambuf::init(char *, int, char *)
+-strstreambuf::init
+-#
+---format=lucid --no-params
+-init__3iosFP9streambuf
+-ios::init(streambuf *)
+-ios::init
+-#
+---format=lucid --no-params
+-initcount__13Iostream_init
+-Iostream_init::initcount
+-Iostream_init::initcount
+-#
+---format=lucid --no-params
+-ipfx__7istreamFi
+-istream::ipfx(int)
+-istream::ipfx
+-#
+---format=lucid --no-params
+-ls_complicated__7ostreamFUc
+-ostream::ls_complicated(unsigned char)
+-ostream::ls_complicated
+-#
+---format=lucid --no-params
+-ls_complicated__7ostreamFc
+-ostream::ls_complicated(char)
+-ostream::ls_complicated
+-#
+---format=lucid --no-params
+-overload1arg__FSc
+-overload1arg(signed char)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__FUc
+-overload1arg(unsigned char)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__FUi
+-overload1arg(unsigned int)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__FUl
+-overload1arg(unsigned long)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__FUs
+-overload1arg(unsigned short)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fc
+-overload1arg(char)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fd
+-overload1arg(double)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Ff
+-overload1arg(float)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fi
+-overload1arg(int)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fl
+-overload1arg(long)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fs
+-overload1arg(short)
+-overload1arg
+-#
+---format=lucid --no-params
+-overload1arg__Fv
+-overload1arg(void)
+-overload1arg
+-#
+---format=lucid --no-params
+-overloadargs__FiN21
+-overloadargs(int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN31
+-overloadargs(int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN41
+-overloadargs(int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN51
+-overloadargs(int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN61
+-overloadargs(int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN71
+-overloadargs(int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN81
+-overloadargs(int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN91
+-overloadargs(int, int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiN91N11
+-overloadargs(int, int, int, int, int, int, int, int, int, int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-overloadargs__FiT1
+-overloadargs(int, int)
+-overloadargs
+-#
+---format=lucid --no-params
+-polar__FdT1
+-polar(double, double)
+-polar
+-#
+---format=lucid --no-params
+-pow__F7complexT1
+-pow(complex, complex)
+-pow
+-#
+---format=lucid --no-params
+-pow__F7complexd
+-pow(complex, double)
+-pow
+-#
+---format=lucid --no-params
+-pow__F7complexi
+-pow(complex, int)
+-pow
+-#
+---format=lucid --no-params
+-pow__Fd7complex
+-pow(double, complex)
+-pow
+-#
+---format=lucid --no-params
+-pstart__FPciT2
+-pstart(char *, int, int)
+-pstart
+-#
+---format=lucid --no-params
+-put__7ostreamFc
+-ostream::put(char)
+-ostream::put
+-#
+---format=lucid --no-params
+-read__7istreamFPci
+-istream::read(char *, int)
+-istream::read
+-#
+---format=lucid --no-params
+-resetiosflags__FR3iosl
+-resetiosflags(ios &, long)
+-resetiosflags
+-#
+---format=lucid --no-params
+-restore_errno__FRi
+-restore_errno(int &)
+-restore_errno
+-#
+---format=lucid --no-params
+-rs_complicated__7istreamFRUc
+-istream::rs_complicated(unsigned char &)
+-istream::rs_complicated
+-#
+---format=lucid --no-params
+-rs_complicated__7istreamFRc
+-istream::rs_complicated(char &)
+-istream::rs_complicated
+-#
+---format=lucid --no-params
+-seekg__7istreamFl8seek_dir
+-istream::seekg(long, seek_dir)
+-istream::seekg
+-#
+---format=lucid --no-params
+-seekoff__12strstreambufFl8seek_diri
+-strstreambuf::seekoff(long, seek_dir, int)
+-strstreambuf::seekoff
+-#
+---format=lucid --no-params
+-seekoff__9streambufFlQ2_3ios12ios_seek_diri
+-streambuf::seekoff(long, ios::ios_seek_dir, int)
+-streambuf::seekoff
+-#
+---format=lucid --no-params
+-seekpos__9streambufFli
+-streambuf::seekpos(long, int)
+-streambuf::seekpos
+-#
+---format=lucid --no-params
+-set_new_handler__FPFv_v
+-set_new_handler(void (*)(void))
+-set_new_handler
+-#
+---format=lucid --no-params
+-setb__9streambufFPcT1i
+-streambuf::setb(char *, char *, int)
+-streambuf::setb
+-#
+---format=lucid --no-params
+-setb__FR3iosi
+-setb(ios &, int)
+-setb
+-#
+---format=lucid --no-params
+-setbuf__11fstreambaseFPci
+-fstreambase::setbuf(char *, int)
+-fstreambase::setbuf
+-#
+---format=lucid --no-params
+-setbuf__9streambufFPUci
+-streambuf::setbuf(unsigned char *, int)
+-streambuf::setbuf
+-#
+---format=lucid --no-params
+-setbuf__9streambufFPciT2
+-streambuf::setbuf(char *, int, int)
+-streambuf::setbuf
+-#
+---format=lucid --no-params
+-setf__3iosFlT1
+-ios::setf(long, long)
+-ios::setf
+-#
+---format=lucid --no-params
+-setfill__FR3iosi
+-setfill(ios &, int)
+-setfill
+-#
+---format=lucid --no-params
+-setg__9streambufFPcN21
+-streambuf::setg(char *, char *, char *)
+-streambuf::setg
+-#
+---format=lucid --no-params
+-setp__9streambufFPcT1
+-streambuf::setp(char *, char *)
+-streambuf::setp
+-#
+---format=lucid --no-params
+-tie__3iosFP7ostream
+-ios::tie(ostream *)
+-ios::tie
+-#
+---format=lucid --no-params
+-uconv10__FUlPc
+-uconv10(unsigned long, char *)
+-uconv10
+-#
+---format=lucid --no-params
+-xget__7istreamFPc
+-istream::xget(char *)
+-istream::xget
+-#
+---format=lucid --no-params
+-xsgetn__9streambufFPci
+-streambuf::xsgetn(char *, int)
+-streambuf::xsgetn
+-#
+---format=arm --no-params
+-__dt__21T5__pt__11_PFiPPdPv_iFv
+-T5<int (*)(int, double **, void *)>::~T5(void)
+-T5<int (*)(int, double **, void *)>::~T5
+-#
+---format=arm --no-params
+-__ct__1cFi
+-c::c(int)
+-c::c
+-#
+---format=arm --no-params
+-__dt__11T5__pt__2_iFv
+-T5<int>::~T5(void)
+-T5<int>::~T5
+-#
+---format=arm --no-params
+-__dt__11T5__pt__2_cFv
+-T5<char>::~T5(void)
+-T5<char>::~T5
+-#
+---format=arm --no-params
+-__ct__2T2Fi
+-T2::T2(int)
+-T2::T2
+-#
+---format=arm --no-params
+-__dt__2T1Fv
+-T1::~T1(void)
+-T1::~T1
+-#
+---format=arm --no-params
+-__dt__12T5__pt__3_1xFv
+-T5<x>::~T5(void)
+-T5<x>::~T5
+-#
+---format=arm --no-params
+-__dt__17T5__pt__8_PFcPv_iFv
+-T5<int (*)(char, void *)>::~T5(void)
+-T5<int (*)(char, void *)>::~T5
+-#
+---format=arm --no-params
+-__ct__21T5__pt__11_PFiPPdPv_iFi
+-T5<int (*)(int, double **, void *)>::T5(int)
+-T5<int (*)(int, double **, void *)>::T5
+-#
+---format=arm --no-params
+-__amd__FR2T2i
+-operator%=(T2 &, int)
+-operator%=
+-#
+---format=arm --no-params
+-__adv__FR2T2i
+-operator/=(T2 &, int)
+-operator/=
+-#
+---format=arm --no-params
+-__amu__FR2T2i
+-operator*=(T2 &, int)
+-operator*=
+-#
+---format=arm --no-params
+-__ami__FR2T2i
+-operator-=(T2 &, int)
+-operator-=
+-#
+---format=arm --no-params
+-__apl__FR2T2i
+-operator+=(T2 &, int)
+-operator+=
+-#
+---format=arm --no-params
+-__nw__2T1SFUi
+-T1::operator new(unsigned int) static
+-T1::operator new
+-#
+---format=arm --no-params
+-__dl__2T1SFPv
+-T1::operator delete(void *) static
+-T1::operator delete
+-#
+---format=arm --no-params
+-put__2T7SFi
+-T7::put(int) static
+-T7::put
+-#
+---format=arm --no-params
+-__dl__12T5__pt__3_1xSFPv
+-T5<x>::operator delete(void *) static
+-T5<x>::operator delete
+-#
+---format=arm --no-params
+-h__FUc
+-h(unsigned char)
+-h
+-#
+---format=arm --no-params
+-f__Fic
+-f(int, char)
+-f
+-#
+---format=arm --no-params
+-h__FUi
+-h(unsigned int)
+-h
+-#
+---format=arm --no-params
+-h__Fci
+-h(char, int)
+-h
+-#
+---format=arm --no-params
+-h__FUl
+-h(unsigned long)
+-h
+-#
+---format=arm --no-params
+-h__Fcl
+-h(char, long)
+-h
+-#
+---format=arm --no-params
+-h__FUs
+-h(unsigned short)
+-h
+-#
+---format=arm --no-params
+-h__Fcs
+-h(char, short)
+-h
+-#
+---format=arm --no-params
+-X__12T5__pt__3_1x
+-T5<x>::X
+-T5<x>::X
+-#
+---format=arm --no-params
+-__ct__11T5__pt__2_iFi
+-T5<int>::T5(int)
+-T5<int>::T5
+-#
+---format=arm --no-params
+-__ct__11T5__pt__2_cFi
+-T5<char>::T5(int)
+-T5<char>::T5
+-#
+---format=arm --no-params
+-h__FcT1
+-h(char, char)
+-h
+-#
+---format=arm --no-params
+-f__Ficd
+-f(int, char, double)
+-f
+-#
+---format=arm --no-params
+-__dl__17T5__pt__8_PFcPv_iSFPv
+-T5<int (*)(char, void *)>::operator delete(void *) static
+-T5<int (*)(char, void *)>::operator delete
+-#
+---format=arm --no-params
+-X__17T5__pt__8_PFcPv_i
+-T5<int (*)(char, void *)>::X
+-T5<int (*)(char, void *)>::X
+-#
+---format=arm --no-params
+-__ct__12T5__pt__3_1xFi
+-T5<x>::T5(int)
+-T5<x>::T5
+-#
+---format=arm --no-params
+-__dl__21T5__pt__11_PFiPPdPv_iSFPv
+-T5<int (*)(int, double **, void *)>::operator delete(void *) static
+-T5<int (*)(int, double **, void *)>::operator delete
+-#
+---format=arm --no-params
+-__std__foo
+-global destructors keyed to foo
+-global destructors keyed to foo
+-#
+---format=arm --no-params
+-__sti__bar
+-global constructors keyed to bar
+-global constructors keyed to bar
+-#
+---format=arm --no-params
+-f__FicdPcPFci_v
+-f(int, char, double, char *, void (*)(char, int))
+-f
+-#
+---format=arm --no-params
+-f__FicdPcPFic_v
+-f(int, char, double, char *, void (*)(int, char))
+-f
+-#
+---format=arm --no-params
+-get__2T7SFv
+-T7::get(void) static
+-T7::get
+-#
+---format=arm --no-params
+-X__21T5__pt__11_PFiPPdPv_i
+-T5<int (*)(int, double **, void *)>::X
+-T5<int (*)(int, double **, void *)>::X
+-#
+---format=arm --no-params
+-__dl__11T5__pt__2_iSFPv
+-T5<int>::operator delete(void *) static
+-T5<int>::operator delete
+-#
+---format=arm --no-params
+-__dl__11T5__pt__2_cSFPv
+-T5<char>::operator delete(void *) static
+-T5<char>::operator delete
+-#
+---format=arm --no-params
+-h__Fc
+-h(char)
+-h
+-#
+---format=arm --no-params
+-h__Fd
+-h(double)
+-h
+-#
+---format=arm --no-params
+-h__Ff
+-h(float)
+-h
+-#
+---format=arm --no-params
+-h__Fi
+-h(int)
+-h
+-#
+---format=arm --no-params
+-f__Fi
+-f(int)
+-f
+-#
+---format=arm --no-params
+-h__Fl
+-h(long)
+-h
+-#
+---format=arm --no-params
+-h__Fs
+-h(short)
+-h
+-#
+---format=arm --no-params
+-X__11T5__pt__2_c
+-T5<char>::X
+-T5<char>::X
+-#
+---format=arm --no-params
+-X__11T5__pt__2_i
+-T5<int>::X
+-T5<int>::X
+-#
+---format=arm --no-params
+-__ct__17T5__pt__8_PFcPv_iFi
+-T5<int (*)(char, void *)>::T5(int)
+-T5<int (*)(char, void *)>::T5
+-#
+---format=arm --no-params
+-f__FicdPc
+-f(int, char, double, char *)
+-f
+-#
+---format=arm --no-params
+-__nw__FUi
+-operator new(unsigned int)
+-operator new
+-#
+---format=arm --no-params
+-__ct__Q3_2T11a1bSFi
+-T1::a::b::b(int) static
+-T1::a::b::b
+-#
+---format=arm --no-params
+-__dt__Q3_2T11a1bSFi
+-T1::a::b::~b(int) static
+-T1::a::b::~b
+-#
+---format=arm --no-params
+-put__Q3_2T11a1bSFi
+-T1::a::b::put(int) static
+-T1::a::b::put
+-#
+---format=arm --no-params
+-get__Q2_2T11aSFv
+-T1::a::get(void) static
+-T1::a::get
+-#
+---format=arm --no-params
+-put__2T1SFi
+-T1::put(int) static
+-T1::put
+-#
+---format=arm --no-params
+-put__Q5_2T11a1b1c1dSFi
+-T1::a::b::c::d::put(int) static
+-T1::a::b::c::d::put
+-#
+---format=arm --no-params
+-get__Q4_2T11a1b1cSFv
+-T1::a::b::c::get(void) static
+-T1::a::b::c::get
+-#
+---format=arm --no-params
+-put__Q2_2T11aSFi
+-T1::a::put(int) static
+-T1::a::put
+-#
+---format=arm --no-params
+-put__Q4_2T11a1b1cSFi
+-T1::a::b::c::put(int) static
+-T1::a::b::c::put
+-#
+---format=arm --no-params
+-get__Q3_2T11a1bSFv
+-T1::a::b::get(void) static
+-T1::a::b::get
+-#
+---format=arm --no-params
+-get__2T1SFv
+-T1::get(void) static
+-T1::get
+-#
+---format=arm --no-params
+-get__Q5_2T11a1b1c1dSFv
+-T1::a::b::c::d::get(void) static
+-T1::a::b::c::d::get
+-#
+---format=arm --no-params
+-__dt__11T1__pt__2_cFv
+-T1<char>::~T1(void)
+-T1<char>::~T1
+-#
+---format=arm --no-params
+-__dt__12T1__pt__3_1tFv
+-T1<t>::~T1(void)
+-T1<t>::~T1
+-#
+---format=arm --no-params
+-__dl__12T1__pt__3_1tSFPv
+-T1<t>::operator delete(void *) static
+-T1<t>::operator delete
+-#
+---format=arm --no-params
+-__ct__11T1__pt__2_cFi
+-T1<char>::T1(int)
+-T1<char>::T1
+-#
+---format=arm --no-params
+-__ct__11T1__pt__2_cFv
+-T1<char>::T1(void)
+-T1<char>::T1
+-#
+---format=arm --no-params
+-__ct__12T1__pt__3_1tFi
+-T1<t>::T1(int)
+-T1<t>::T1
+-#
+---format=arm --no-params
+-__ct__12T1__pt__3_1tFv
+-T1<t>::T1(void)
+-T1<t>::T1
+-#
+---format=arm --no-params
+-__dl__11T1__pt__2_cSFPv
+-T1<char>::operator delete(void *) static
+-T1<char>::operator delete
+-#
+---format=arm --no-params
+-bar__3fooFPv
+-foo::bar(void *)
+-foo::bar
+-#
+---format=arm --no-params
+-bar__3fooCFPv
+-foo::bar(void *) const
+-foo::bar
+-#
+---format=arm --no-params
+-__eq__3fooFR3foo
+-foo::operator==(foo &)
+-foo::operator==
+-#
+---format=arm --no-params
+-__eq__3fooCFR3foo
+-foo::operator==(foo &) const
+-foo::operator==
+-#
+---format=arm --no-params
+-elem__15vector__pt__2_dFi
+-vector<double>::elem(int)
+-vector<double>::elem
+-#
+---format=arm --no-params
+-elem__15vector__pt__2_iFi
+-vector<int>::elem(int)
+-vector<int>::elem
+-#
+---format=arm --no-params
+-__ct__15vector__pt__2_dFi
+-vector<double>::vector(int)
+-vector<double>::vector
+-#
+---format=arm --no-params
+-__ct__15vector__pt__2_iFi
+-vector<int>::vector(int)
+-vector<int>::vector
+-#
+---format=arm --no-params
+-__ct__25DListNode__pt__9_R6RLabelFR6RLabelP25DListNode__pt__9_R6RLabelT2
+-DListNode<RLabel &>::DListNode(RLabel &, DListNode<RLabel &> *, DListNode<RLabel &> *)
+-DListNode<RLabel &>::DListNode
+-#
+---format=arm --no-params
+-__ct__25DListNode__pt__9_O6RLabelFO6RLabelP25DListNode__pt__9_O6RLabelT2
+-DListNode<RLabel &&>::DListNode(RLabel &&, DListNode<RLabel &&> *, DListNode<RLabel &&> *)
+-DListNode<RLabel &&>::DListNode
+-#
+---format=arm --no-params
+-bar__3fooFiT16FooBar
+-foo::bar(int, int, FooBar)
+-foo::bar
+-#
+---format=arm --no-params
+-bar__3fooFPiN51PdN37PcN211T1iN215
+-foo::bar(int *, int *, int *, int *, int *, int *, double *, double *, double *, double *, char *, char *, char *, int *, int, int, int)
+-foo::bar
+-#
+---format=hp --no-params
+-__amd__FR2T2i
+-operator%=(T2 &, int)
+-operator%=
+-#
+---format=hp --no-params
+-__adv__FR2T2i
+-operator/=(T2 &, int)
+-operator/=
+-#
+---format=hp --no-params
+-__amu__FR2T2i
+-operator*=(T2 &, int)
+-operator*=
+-#
+---format=hp --no-params
+-__ami__FR2T2i
+-operator-=(T2 &, int)
+-operator-=
+-#
+---format=hp --no-params
+-__apl__FR2T2i
+-operator+=(T2 &, int)
+-operator+=
+-#
+---format=hp --no-params
+-__nw__2T1SFUi
+-T1::operator new(unsigned int) static
+-T1::operator new
+-#
+---format=hp --no-params
+-__dl__2T1SFPv
+-T1::operator delete(void *) static
+-T1::operator delete
+-#
+---format=hp --no-params
+-put__2T7SFi
+-T7::put(int) static
+-T7::put
+-#
+---format=hp --no-params
+-h__FUc
+-h(unsigned char)
+-h
+-#
+---format=hp --no-params
+-f__Fic
+-f(int, char)
+-f
+ #
+---format=hp --no-params
+-h__FUi
+-h(unsigned int)
+-h
+-#
+---format=hp --no-params
+-h__Fci
+-h(char, int)
+-h
+-#
+---format=hp --no-params
+-h__FUl
+-h(unsigned long)
+-h
+-#
+---format=hp --no-params
+-h__Fcl
+-h(char, long)
+-h
+-#
+---format=hp --no-params
+-h__FUs
+-h(unsigned short)
+-h
+-#
+---format=hp --no-params
+-h__Fcs
+-h(char, short)
+-h
+-#
+---format=hp --no-params
+-h__FcT1
+-h(char, char)
+-h
+-#
+---format=hp --no-params
+-f__Ficd
+-f(int, char, double)
+-f
+-#
+---format=hp --no-params
+-f__FicdPcPFci_v
+-f(int, char, double, char *, void (*)(char, int))
+-f
+-#
+---format=hp --no-params
+-f__FicdPcPFic_v
+-f(int, char, double, char *, void (*)(int, char))
+-f
+-#
+---format=hp --no-params
+-get__2T7SFv
+-T7::get(void) static
+-T7::get
+-#
+---format=hp --no-params
+-h__Fc
+-h(char)
+-h
+-#
+---format=hp --no-params
+-h__Fd
+-h(double)
+-h
+-#
+---format=hp --no-params
+-h__Ff
+-h(float)
+-h
+-#
+---format=hp --no-params
+-h__Fi
+-h(int)
+-h
+-#
+---format=hp --no-params
+-f__Fi
+-f(int)
+-f
+-#
+---format=hp --no-params
+-h__Fl
+-h(long)
+-h
++# Tests integer overflow problem PR70492
++
++__vt_90000000000cafebabe
++__vt_90000000000cafebabe
+ #
+---format=hp --no-params
+-h__Fs
+-h(short)
+-h
++# Tests write access violation PR70498
++
++_Z80800000000000000000000
++_Z80800000000000000000000
+ #
+---format=hp --no-params
+-f__FicdPc
+-f(int, char, double, char *)
+-f
++# Tests write access violation PR70926
++
++0__Ot2m02R5T0000500000
++0__Ot2m02R5T0000500000
+ #
+---format=hp --no-params
+-__nw__FUi
+-operator new(unsigned int)
+-operator new
+-#
+---format=hp --no-params
+-__ct__Q3_2T11a1bSFi
+-T1::a::b::b(int) static
+-T1::a::b::b
+-#
+---format=hp --no-params
+-__dt__Q3_2T11a1bSFi
+-T1::a::b::~b(int) static
+-T1::a::b::~b
+-#
+---format=hp --no-params
+-put__Q3_2T11a1bSFi
+-T1::a::b::put(int) static
+-T1::a::b::put
+-#
+---format=hp --no-params
+-get__Q2_2T11aSFv
+-T1::a::get(void) static
+-T1::a::get
+-#
+---format=hp --no-params
+-put__2T1SFi
+-T1::put(int) static
+-T1::put
+-#
+---format=hp --no-params
+-put__Q5_2T11a1b1c1dSFi
+-T1::a::b::c::d::put(int) static
+-T1::a::b::c::d::put
+-#
+---format=hp --no-params
+-get__Q4_2T11a1b1cSFv
+-T1::a::b::c::get(void) static
+-T1::a::b::c::get
+-#
+---format=hp --no-params
+-put__Q2_2T11aSFi
+-T1::a::put(int) static
+-T1::a::put
+-#
+---format=hp --no-params
+-put__Q4_2T11a1b1cSFi
+-T1::a::b::c::put(int) static
+-T1::a::b::c::put
+-#
+---format=hp --no-params
+-get__Q3_2T11a1bSFv
+-T1::a::b::get(void) static
+-T1::a::b::get
+-#
+---format=hp --no-params
+-get__2T1SFv
+-T1::get(void) static
+-T1::get
+-#
+---format=hp --no-params
+-get__Q5_2T11a1b1c1dSFv
+-T1::a::b::c::d::get(void) static
+-T1::a::b::c::d::get
+-#
+---format=hp --no-params
+-bar__3fooFPv
+-foo::bar(void *)
+-foo::bar
+-#
+---format=hp --no-params
+-bar__3fooCFPv
+-foo::bar(void *) const
+-foo::bar
+-#
+---format=hp --no-params
+-__eq__3fooFR3foo
+-foo::operator==(foo &)
+-foo::operator==
+-#
+---format=hp --no-params
+-__eq__3fooCFR3foo
+-foo::operator==(foo &) const
+-foo::operator==
+-#
+---format=hp --no-params
+-bar__3fooFiT16FooBar
+-foo::bar(int, int, FooBar)
+-foo::bar
+-#
+---format=hp --no-params
+-bar__3fooFPiN51PdN37PcN211T1iN215
+-foo::bar(int *, int *, int *, int *, int *, int *, double *, double *, double *, double *, char *, char *, char *, int *, int, int, int)
+-foo::bar
+-#
+---format=hp --no-params
+-__dt__2T5XTPFiPPdPv_i__Fv
+-T5<int (*)(int, double **, void *)>::~T5(void)
+-T5<int (*)(int, double **, void *)>::~T5
+-#
+---format=hp --no-params
+-__ct__1cFi
+-c::c(int)
+-c::c
+-#
+---format=hp --no-params
+-__dt__2T5XTi__Fv
+-T5<int>::~T5(void)
+-T5<int>::~T5
+-#
+---format=hp --no-params
+-__dt__2T5XTc__Fv
+-T5<char>::~T5(void)
+-T5<char>::~T5
+-#
+---format=hp --no-params
+-__ct__2T2Fi
+-T2::T2(int)
+-T2::T2
+-#
+---format=hp --no-params
+-__dt__2T1Fv
+-T1::~T1(void)
+-T1::~T1
+-#
+---format=hp --no-params
+-__dt__2T5XT1x__Fv
+-T5<x>::~T5(void)
+-T5<x>::~T5
+-#
+---format=hp --no-params
+-__dt__2T5XTPFcPv_i__Fv
+-T5<int (*)(char, void *)>::~T5(void)
+-T5<int (*)(char, void *)>::~T5
+-#
+---format=hp --no-params
+-__ct__2T5XTPFiPPdPv_i__Fi
+-T5<int (*)(int, double **, void *)>::T5(int)
+-T5<int (*)(int, double **, void *)>::T5
+-#
+---format=hp --no-params
+-__dl__2T5XT1x__SFPv
+-T5<x>::operator delete(void *) static
+-T5<x>::operator delete
+-#
+---format=hp --no-params
+-X__2T5XT1x
+-T5<x>::X
+-T5<x>::X
+-#
+---format=hp --no-params
+-__ct__2T5XTi__Fi
+-T5<int>::T5(int)
+-T5<int>::T5
+-#
+---format=hp --no-params
+-__ct__2T5XTc__Fi
+-T5<char>::T5(int)
+-T5<char>::T5
+-#
+---format=hp --no-params
+-__dl__2T5XTPFcPv_i__SFPv
+-T5<int (*)(char, void *)>::operator delete(void *) static
+-T5<int (*)(char, void *)>::operator delete
+-#
+---format=hp --no-params
+-X__2T5XTPFcPv_i
+-T5<int (*)(char, void *)>::X
+-T5<int (*)(char, void *)>::X
+-#
+---format=hp --no-params
+-__ct__2T5XT1x__Fi
+-T5<x>::T5(int)
+-T5<x>::T5
+-#
+---format=hp --no-params
+-__dl__2T5XTPFiPPdPv_i__SFPv
+-T5<int (*)(int, double **, void *)>::operator delete(void *) static
+-T5<int (*)(int, double **, void *)>::operator delete
+-#
+---format=hp --no-params
+-X__2T5XTPFiPPdPv_i
+-T5<int (*)(int, double **, void *)>::X
+-T5<int (*)(int, double **, void *)>::X
+-#
+---format=hp --no-params
+-__dl__2T5XTi__SFPv
+-T5<int>::operator delete(void *) static
+-T5<int>::operator delete
+-#
+---format=hp --no-params
+-__dl__2T5XTc__SFPv
+-T5<char>::operator delete(void *) static
+-T5<char>::operator delete
+-#
+---format=hp --no-params
+-X__2T5XTc
+-T5<char>::X
+-T5<char>::X
+-#
+---format=hp --no-params
+-X__2T5XTi
+-T5<int>::X
+-T5<int>::X
+-#
+---format=hp --no-params
+-__ct__2T5XTPFcPv_i__Fi
+-T5<int (*)(char, void *)>::T5(int)
+-T5<int (*)(char, void *)>::T5
+-#
+---format=hp --no-params
+-__dt__2T1XTc__Fv
+-T1<char>::~T1(void)
+-T1<char>::~T1
+-#
+---format=hp --no-params
+-__dt__2T1XT1t__Fv
+-T1<t>::~T1(void)
+-T1<t>::~T1
+-#
+---format=hp --no-params
+-__dl__2T1XT1t__SFPv
+-T1<t>::operator delete(void *) static
+-T1<t>::operator delete
+-#
+---format=hp --no-params
+-__ct__2T1XTc__Fi
+-T1<char>::T1(int)
+-T1<char>::T1
+-#
+---format=hp --no-params
+-__ct__2T1XTc__Fv
+-T1<char>::T1(void)
+-T1<char>::T1
+-#
+---format=hp --no-params
+-__ct__2T1XT1t__Fi
+-T1<t>::T1(int)
+-T1<t>::T1
+-#
+---format=hp --no-params
+-__ct__2T1XT1t__Fv
+-T1<t>::T1(void)
+-T1<t>::T1
+-#
+---format=hp --no-params
+-__dl__2T1XTc__SFPv
+-T1<char>::operator delete(void *) static
+-T1<char>::operator delete
+-#
+---format=hp --no-params
+-elem__6vectorXTd__Fi
+-vector<double>::elem(int)
+-vector<double>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTi__Fi
+-vector<int>::elem(int)
+-vector<int>::elem
+-#
+---format=hp --no-params
+-__ct__6vectorXTd__Fi
+-vector<double>::vector(int)
+-vector<double>::vector
+-#
+---format=hp --no-params
+-__ct__6vectorXTi__Fi
+-vector<int>::vector(int)
+-vector<int>::vector
+-#
+---format=hp --no-params
+-__ct__9DListNodeXTR6RLabel__FR6RLabelP9DListNodeXTR6RLabel_T2
+-DListNode<RLabel &>::DListNode(RLabel &, DListNode<RLabel &> *, DListNode<RLabel &> *)
+-DListNode<RLabel &>::DListNode
+-#
+---format=hp --no-params
+-__ct__9DListNodeXTO6RLabel__FO6RLabelP9DListNodeXTO6RLabel_T2
+-DListNode<RLabel &&>::DListNode(RLabel &&, DListNode<RLabel &&> *, DListNode<RLabel &&> *)
+-DListNode<RLabel &&>::DListNode
+-#
+---format=hp --no-params
+-elem__6vectorXTiUP34__Fi
+-vector<int,34U>::elem(int)
+-vector<int,34U>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXUP2701Td__Fi
+-vector<2701U,double>::elem(int)
+-vector<2701U,double>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSP334__Fi
+-vector<int,334>::elem(int)
+-vector<int,334>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN67__Fi
+-vector<int,-67>::elem(int)
+-vector<int,-67>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSM__SCFPPd
+-vector<int,-2147483648>::elem(double **) static const
+-vector<int,-2147483648>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN67UP4000TRs__Fi
+-vector<int,-67,4000U,short &>::elem(int)
+-vector<int,-67,4000U,short &>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN67UP4000TOs__Fi
+-vector<int,-67,4000U,short &&>::elem(int)
+-vector<int,-67,4000U,short &&>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN67TRdTFPv_i__Fi
+-vector<int,-67,double &,int (void *)>::elem(int)
+-vector<int,-67,double &,int (void *)>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN67TOdTFPv_i__Fi
+-vector<int,-67,double &&,int (void *)>::elem(int)
+-vector<int,-67,double &&,int (void *)>::elem
+-#
+---format=hp --no-params
+-X__6vectorXTiSN67TdTPvUP5TRs
+-vector<int,-67,double,void *,5U,short &>::X
+-vector<int,-67,double,void *,5U,short &>::X
+-#
+---format=hp --no-params
+-X__6vectorXTiSN67TdTPvUP5TOs
+-vector<int,-67,double,void *,5U,short &&>::X
+-vector<int,-67,double,void *,5U,short &&>::X
+-#
+---format=hp --no-params
+-elem__6vectorXTiA3foo__Fi
+-vector<int,&foo>::elem(int)
+-vector<int,&foo>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiA3fooTPvA5Label__FiPPvT2
+-vector<int,&foo,void *,&Label>::elem(int, void **, void **)
+-vector<int,&foo,void *,&Label>::elem
+-#
+---format=hp --no-params
+-elem__6vectorXTiSN42A3foo__Fi
+-vector<int,-42,&foo>::elem(int)
+-vector<int,-42,&foo>::elem
+-#
+---format=hp --no-params
+-__ct__2T5XTPFcPv_i__Fi_2
+-T5<int (*)(char, void *)>::T5(int)
+-T5<int (*)(char, void *)>::T5
+-#
+---format=hp --no-params
+-__ct__2T5XTPFcPv_i__Fi_19
+-T5<int (*)(char, void *)>::T5(int)
+-T5<int (*)(char, void *)>::T5
+-#
+---format=hp --no-params
+-f__FicdPcPFci_v_34
+-f(int, char, double, char *, void (*)(char, int))
+-f
++
++0__GT50000000000_
++0__GT50000000000_
+ #
+---format=hp --no-params
+-spec__13Spec<#1,#1.*>XTiTPi_FPi
+-Spec<int,int *>::spec(int *)
+-Spec<int,int *>::spec
+-#
+---format=hp --no-params
+-spec__16Spec<#1,#1.&,#1>XTiTRiTi_FPi
+-Spec<int,int &,int>::spec(int *)
+-Spec<int,int &,int>::spec
+-#
+---format=hp --no-params
+-spec__17Spec<#1,#1.&&,#1>XTiTOiTi_FPi
+-Spec<int,int &&,int>::spec(int *)
+-Spec<int,int &&,int>::spec
+-#
+---format=hp --no-params
+-add__XTc_FcT1
+-add<char>(char, char)
+-add<char>
+-#
+---format=hp --no-params
+-add__XTcSP9A5label_FcPPlT1
+-add<char,9,&label>(char, long **, char)
+-add<char,9,&label>
+-#
+---format=hp --no-params
+-add__XTPfTFPd_f_FcT1
+-add<float *,float (double *)>(char, char)
+-add<float *,float (double *)>
+-#
+---format=hp --no-params
+-unLink__12basic_stringXTcT18string_char_traitsXTc_T9allocator_Fv
+-basic_string<char,string_char_traits<char>,allocator>::unLink(void)
+-basic_string<char,string_char_traits<char>,allocator>::unLink
+-#
+-# A regression test with no args.  This used to cause a segv.
+ 
+-_Utf390_1__1_9223372036854775807__9223372036854775
+-_Utf390_1__1_9223372036854775807__9223372036854775
+-#
+---format=gnu --no-params
+-call__H1Z4Test_RX01_t1C2ZX01PMX01FPX01i_vQ2X016output
+-C<Test, Test::output> call<Test>(Test &)
+-C<Test, Test::output> call<Test>
+-#
+---format=gnu --no-params
+-call__H1Z4Test_OX01_t1C2ZX01PMX01FPX01i_vQ2X016output
+-C<Test, Test::output> call<Test>(Test &&)
+-C<Test, Test::output> call<Test>
+-#
+---format=gnu --no-params
+-fn__FPQ21n1cPMQ21n1cFPQ21n1c_i
+-fn(n::c *, int (n::c::*)(n::c *))
+-fn
+-#
+---format=gnu --no-params
+-f__FGt3Bar1i2G1i
+-f(Bar<2>, i)
+-f
++__t2m05B500000000000000000_
++__t2m05B500000000000000000_
++##
++## Tests stack overflow PR71696
+ #
+---format=gnu --no-params
+-f__FGt3Bar1i21i
+-f(Bar<21>, int)
+-f
++#__10%0__S4_0T0T0
++#%0<>::%0(%0<>)
++# Could crash
++
++_
++_
++# Could crash
++
++_vt
++_vt
++# Could crash
++
++_$_1Acitz
++_$_1Acitz
++# Could crash
++
++_$_H1R
++_$_H1R
++# Could crash
++
++_Q8ccQ4M2e.
++_Q8ccQ4M2e.
+ #
+---format=gnu --no-params
+-f__FGt3Bar1i2G4XY_t
+-f(Bar<2>, XY_t)
+-f
++# demangler/80513 Test for bogus characters after __thunk_
++
++__thunk_16a_$_1x
++__thunk_16a_$_1x
+ #
+---format=gnu --no-params
+-foo__H1Zt2TA2ZRCiZt2NA1Ui9_X01_i
+-int foo<TA<int const &, NA<9> > >(TA<int const &, NA<9> >)
+-int foo<TA<int const &, NA<9> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZOCiZt2NA1Ui9_X01_i
+-int foo<TA<int const &&, NA<9> > >(TA<int const &&, NA<9> >)
+-int foo<TA<int const &&, NA<9> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZcZt2NA1Ui20_X01_i
+-int foo<TA<char, NA<20> > >(TA<char, NA<20> >)
+-int foo<TA<char, NA<20> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZiZt8N___A___1Ui99_X01_i
+-int foo<TA<int, N___A___<99> > >(TA<int, N___A___<99> >)
+-int foo<TA<int, N___A___<99> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZRCiZt2NA1im1_X01_i
+-int foo<TA<int const &, NA<-1> > >(TA<int const &, NA<-1> >)
+-int foo<TA<int const &, NA<-1> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZRCiZt2NA1im9_X01_i
+-int foo<TA<int const &, NA<-9> > >(TA<int const &, NA<-9> >)
+-int foo<TA<int const &, NA<-9> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZcZt2NA1i_m20__X01_i
+-int foo<TA<char, NA<-20> > >(TA<char, NA<-20> >)
+-int foo<TA<char, NA<-20> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZcZt2NA1im1_X01_i
+-int foo<TA<char, NA<-1> > >(TA<char, NA<-1> >)
+-int foo<TA<char, NA<-1> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZiZt4N__A1im9_X01_i
+-int foo<TA<int, N__A<-9> > >(TA<int, N__A<-9> >)
+-int foo<TA<int, N__A<-9> > >
+-#
+---format=gnu --no-params
+-foo__H1Zt2TA2ZiZt4N__A1i_m99__X01_i
+-int foo<TA<int, N__A<-99> > >(TA<int, N__A<-99> >)
+-int foo<TA<int, N__A<-99> > >
+-#
+---format=gnu --no-params
+-__opi__t2TA2ZiZt4N__A1i9
+-TA<int, N__A<9> >::operator int(void)
+-TA<int, N__A<9> >::operator int
+-#
+---format=gnu --no-params
+-__opi__t2TA2ZiZt8N___A___1i_m99_
+-TA<int, N___A___<-99> >::operator int(void)
+-TA<int, N___A___<-99> >::operator int
+-#
+---format=gnu --no-params
+-foo___bar__baz_____H1Zt2TA2ZiZt8N___A___1i99_X01_i
+-int foo___bar__baz___<TA<int, N___A___<99> > >(TA<int, N___A___<99> >)
+-int foo___bar__baz___<TA<int, N___A___<99> > >
+-#
+---format=gnu --no-params
+-foo__bar___foobar_____t2TA2ZiZt8N___A___1i_m99_
+-TA<int, N___A___<-99> >::foo__bar___foobar___(void)
+-TA<int, N___A___<-99> >::foo__bar___foobar___
+-#
+---format=gnu --no-params
+-foo__bar___foobar_____t2TA2ZiZt4N__A1i9
+-TA<int, N__A<9> >::foo__bar___foobar___(void)
+-TA<int, N__A<9> >::foo__bar___foobar___
+-#
+---format=gnu --no-params
+-__tfP8sockaddr
+-sockaddr * type_info function
+-sockaddr * type_info function
+-#
+---format=gnu --no-params
+-__tfPQ25libcwt16option_event_tct1Z12burst_app_ct
+-libcw::option_event_tct<burst_app_ct> * type_info function
+-libcw::option_event_tct<burst_app_ct> * type_info function
+-#
+---format=gnu --no-params
+-__tiP8sockaddr
+-sockaddr * type_info node
+-sockaddr * type_info node
+-#
+---format=gnu --no-params
+-__tiPQ25libcwt16option_event_tct1Z12burst_app_ct
+-libcw::option_event_tct<burst_app_ct> * type_info node
+-libcw::option_event_tct<burst_app_ct> * type_info node
+-#
+---format=gnu --no-params
+-_27_GLOBAL_.N.__12burst_app_ct.app_instance
+-{anonymous}::app_instance
+-{anonymous}::app_instance
+-#
+---format=gnu --no-params
+-_26_GLOBAL_$N$_tmp_n.iilg4Gya$app_instance
+-{anonymous}::app_instance
+-{anonymous}::app_instance
++# demangler/80513 Test for overflow in consume_count
++
++__thunk_4294967297__$_1x
++__thunk_4294967297__$_1x
+ #
+ --format=gnu-v3 --no-params
+ _Z3fo5n
+@@ -3368,7 +117,7 @@ java.util.Map$Entry.class$
+ _ZN3org7eclipse3cdt5debug8internal4core5model9CVariable6sizeof$Ev
+ org.eclipse.cdt.debug.internal.core.model.CVariable.sizeof()
+ #
+---format=hp --no-params
++--format=auto --no-params
+ _Utf58_0_1__1_2147483647__2147483648
+ _Utf58_0_1__1_2147483647__2147483648
+ _Utf58_0_1__1_2147483647__2147483648
+@@ -3904,24 +653,6 @@ _ZNSdD1Ev
+ _ZNSdD1Ev
+ 2
+ #
+-# This caused an infinite loop.
+-#
+-# This is generated by an EDG compiler (kcc 4.0).  To demangle it
+-# correctly, I believe that we have to understand that the J37J deep
+-# in the string somehow refers back to the type starting 37 characters
+-# in from some starting point, so that it winds up being the type
+-# starting with 41THandle....  However, lacking a spec for EDG
+-# demangling, it's hard to implement this.
+-#
+-# In the meantime, this symbol can be successfully demangled in GNU
+-# mode.  Of course the result is more or less nonsense, but an older
+-# version of g++ would indeed generate this mangled name given the
+-# appropriate input, so the demangling is correct.
+---format=auto --no-params
+-__CPR212____ct__Q3_3std141list__tm__128_Q2_3edm41THandle__tm__26_Q2_4emid15EMparticleChunkQ2_3std68allocator__tm__51_Q2_3edmJ37J14const_iteratorFRCQ3_3std18list__tm__7_Z1ZZ2Z8iterator
+-_Z1ZZ2Z::__CPR212____ct__Q3_3std141list__tm__128_Q2_3edm41THandle__tm__26_Q2_4emid15EMparticleChunkQ2_3std68allocator__tm__51_Q2_3edmJ37J14const_iteratorFRCQ3_3std18list__tm(iterator)
+-_Z1ZZ2Z::__CPR212____ct__Q3_3std141list__tm__128_Q2_3edm41THandle__tm__26_Q2_4emid15EMparticleChunkQ2_3std68allocator__tm__51_Q2_3edmJ37J14const_iteratorFRCQ3_3std18list__tm
+-#
+ # This used to cause a crash. It doesn't follow the C++ encoding so
+ # the demangled name should be identical to the original symbol name.
+ --format=auto --no-params
+@@ -4557,50 +1288,12 @@ void binary_left<1, 2, 3>(A<((42)+...+(1, 2, 3))>)
+ 
+ _Z12binary_rightIJLi1ELi2ELi3EEEv1AIXfRplT_Li42EEE
+ void binary_right<1, 2, 3>(A<((1, 2, 3)+...+(42))>)
+-#
+-# Tests a use-after-free problem PR70481
+-
+-_Q.__0
+-::Q.(void)
+-#
+-# Tests a use-after-free problem PR70481
+-
+-_Q10-__9cafebabe.
+-cafebabe.::-(void)
+-#
+-# Tests integer overflow problem PR70492
+-
+-__vt_90000000000cafebabe
+-__vt_90000000000cafebabe
+-#
+-# Tests write access violation PR70498
+-
+-_Z80800000000000000000000
+-_Z80800000000000000000000
+-#
+-# Tests write access violation PR70926
+-
+-0__Ot2m02R5T0000500000
+-0__Ot2m02R5T0000500000
+-#
+-
+-0__GT50000000000_
+-0__GT50000000000_
+-#
+-
+-__t2m05B500000000000000000_
+-__t2m05B500000000000000000_
+-#
+-# Tests stack overflow PR71696
+-
+-__10%0__S4_0T0T0
+-%0<>::%0(%0<>)
+-
+ # Inheriting constructor
++
+ _ZN1DCI11BEi
+ D::B(int)
+-
+ # exception-specification (C++17)
++
+ _Z1fIvJiELb0EEvPDOT1_EFT_DpT0_E
+ void f<void, int, false>(void (*)(int) noexcept(false))
+ 
+@@ -4610,26 +1303,6 @@ void f<void, int, false>(void (*)(int) noexcept)
+ _Z1fIvJiELb0EEvPDwiEFT_DpT0_E
+ void f<void, int, false>(void (*)(int) throw(int))
+ 
+-# Could crash
+-_
+-_
+-
+-# Could crash
+-_vt
+-_vt
+-
+-# Could crash
+-_$_1Acitz
+-_$_1Acitz
+-
+-# Could crash
+-_$_H1R
+-_$_H1R
+-
+-# Could crash
+-_Q8ccQ4M2e.
+-_Q8ccQ4M2e.
+-
+ # fold-expression with missing third component could crash.
+ _Z12binary_rightIJLi1ELi2ELi3EEEv1AIXfRplT_LiEEE
+ _Z12binary_rightIJLi1ELi2ELi3EEEv1AIXfRplT_LiEEE
+@@ -4732,16 +1405,6 @@ _Z1MA_aMMMMA_MMA_MMMMMMMMSt1MS_o11T0000000000t2M0oooozoooo
+ _Z4294967297x
+ _Z4294967297x
+ #
+-# demangler/80513 Test for bogus characters after __thunk_
+-
+-__thunk_16a_$_1x
+-__thunk_16a_$_1x
+-#
+-# demangler/80513 Test for overflow in consume_count
+-
+-__thunk_4294967297__$_1x
+-__thunk_4294967297__$_1x
+-#
+ # demangler/82195 members of lambdas
+ --no-params
+ _ZZZ3FoovENKUlT_E_clIiEEfS_EN5Local2fnEv
diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec
index 47b03f0..d2ed810 100644
--- a/SPECS/gcc.spec
+++ b/SPECS/gcc.spec
@@ -1,10 +1,10 @@
-%global DATE 20200928
-%global gitrev 8ed81e8ef69a535cbc168f55d06941bf3e4ef8ee
+%global DATE 20210423
+%global gitrev 81036e6dfb5dac2e9186f0071f7f2048e81e65fa
 %global gcc_version 8.4.1
 %global gcc_major 8
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 1
+%global gcc_release 2
 %global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f
 %global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24
 %global _unpackaged_files_terminate_build 0
@@ -104,7 +104,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{gcc_release}%{?dist}
+Release: %{gcc_release}.1%{?dist}
 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
 # GCC Runtime Exception.
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
@@ -272,7 +272,7 @@ Patch14: gcc8-libgcc-hardened.patch
 Patch15: gcc8-rh1670535.patch
 Patch16: gcc8-libgomp-20190503.patch
 Patch17: gcc8-libgomp-testsuite.patch
-Patch18: gcc8-pr95614-revert.patch
+Patch18: gcc8-remove-old-demangle.patch
 
 Patch30: gcc8-rh1668903-1.patch
 Patch31: gcc8-rh1668903-2.patch
@@ -849,8 +849,7 @@ to NVidia PTX capable devices if available.
 %patch15 -p0 -b .rh1670535~
 %patch16 -p0 -b .libgomp-20190503~
 %patch17 -p0 -b .libgomp-testsuite~
-%patch18 -p0 -b .pr95614-revert~
-rm -f gcc/testsuite/gfortran.dg/pr95614_*.f90
+%patch18 -p0 -b .demangle~
 
 %patch30 -p0 -b .rh1668903-1~
 %patch31 -p0 -b .rh1668903-2~
@@ -3167,6 +3166,18 @@ fi
 %endif
 
 %changelog
+* Mon Apr 26 2021 Marek Polacek <polacek@redhat.com> 8.4.1-2.1
+- remove support for demangling GCC 2.x era mangling schemes (#1668394)
+
+* Fri Apr 23 2021 Marek Polacek <polacek@redhat.com> 8.4.1-2
+- update from GCC 8.4 release (#1946758)
+
+* Fri Apr  9 2021 Marek Polacek <polacek@redhat.com> 8.4.1-1.2
+- back out the PR97236 patch
+
+* Fri Apr  9 2021 Marek Polacek <polacek@redhat.com> 8.4.1-1.1
+- fix bad use of VMAT_CONTIGUOUS (PR tree-optimization/97236, #1925632)
+
 * Tue Sep 29 2020 Marek Polacek <polacek@redhat.com> 8.4.1-1
 - update from GCC 8.4 release (#1868446)
 - remove symlinks to 32-bit versions of these static libraries: libasan.a,