From 21cf5aadf26305ccbd4de25462648344602643e9 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 May 2022 10:56:00 +0200 Subject: [PATCH] guestfs_readdir(): rewrite with FileOut transfer, to lift protocol limit Currently the guestfs_readdir() API can not list long directories, due to it sending back the whole directory listing in a single guestfs protocol response, which is limited to GUESTFS_MESSAGE_MAX (approx. 4MB) in size. Introduce the "internal_readdir" action, for transferring the directory listing from the daemon to the library through a FileOut parameter. Rewrite guestfs_readdir() on top of this new internal function: - The new "internal_readdir" action is a daemon action. Do not repurpose the "readdir" proc_nr (138) for "internal_readdir", as some distros ship the binary appliance to their users, and reusing the proc_nr could create a mismatch between library & appliance with obscure symptoms. Replace the old proc_nr (138) with a new proc_nr (511) instead; a mismatch would then produce a clear error message. Assume the new action will first be released in libguestfs-1.48.2. - Turn "readdir" from a daemon action into a non-daemon one. Call the daemon action guestfs_internal_readdir() manually, receive the FileOut parameter into a temp file, then deserialize the dirents array from the temp file. This patch sneakily fixes an independent bug, too. In the pre-patch do_readdir() function [daemon/readdir.c], when readdir() returns NULL, we don't distinguish "end of directory stream" from "readdir() failed". This rewrite fixes this problem -- I didn't see much value separating out the fix for the original do_readdir(). Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1674392 Signed-off-by: Laszlo Ersek Message-Id: <20220502085601.15012-2-lersek@redhat.com> Reviewed-by: Richard W.M. Jones (cherry picked from commit 45b7f1736b64e9f0741e21e5a9d83a837bd863bf) --- TODO | 8 --- daemon/readdir.c | 132 +++++++++++++++++++------------------- generator/actions_core.ml | 127 +++++++++++++++++++----------------- generator/proc_nr.ml | 2 +- lib/MAX_PROC_NR | 2 +- lib/Makefile.am | 1 + lib/readdir.c | 131 +++++++++++++++++++++++++++++++++++++ 7 files changed, 267 insertions(+), 136 deletions(-) create mode 100644 lib/readdir.c diff --git a/TODO b/TODO index a50f7d73c..513e55f92 100644 --- a/TODO +++ b/TODO @@ -484,14 +484,6 @@ this approach works, it doesn't solve the MBR problem, so likely we'd have to write a library for that (or perhaps go back to sfdisk but using a very abstracted interface over sfdisk). -Reimplement some APIs to avoid protocol limits ----------------------------------------------- - -Mostly this item was done (eg. commits a69f44f56f and before). The -most notable API with a protocol limit remaining is: - - - guestfs_readdir - hivex ----- diff --git a/daemon/readdir.c b/daemon/readdir.c index e488f93e7..9ab0b0aec 100644 --- a/daemon/readdir.c +++ b/daemon/readdir.c @@ -16,77 +16,67 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include +#include /* HAVE_STRUCT_DIRENT_D_TYPE */ -#include -#include -#include -#include -#include +#include /* readdir() */ +#include /* errno */ +#include /* xdrmem_create() */ +#include /* perror() */ +#include /* malloc() */ +#include /* opendir() */ -#include "daemon.h" -#include "actions.h" +#include "daemon.h" /* reply_with_perror() */ -static void -free_int_dirent_list (guestfs_int_dirent *p, size_t len) +/* Has one FileOut parameter. */ +int +do_internal_readdir (const char *dir) { - size_t i; + int ret; + DIR *dirstream; + void *xdr_buf; + XDR xdr; - for (i = 0; i < len; ++i) { - free (p[i].name); - } - free (p); -} - -guestfs_int_dirent_list * -do_readdir (const char *path) -{ - guestfs_int_dirent_list *ret; - guestfs_int_dirent v; - DIR *dir; - struct dirent *d; - size_t i; - - ret = malloc (sizeof *ret); - if (ret == NULL) { - reply_with_perror ("malloc"); - return NULL; - } - - ret->guestfs_int_dirent_list_len = 0; - ret->guestfs_int_dirent_list_val = NULL; + /* Prepare to fail. */ + ret = -1; CHROOT_IN; - dir = opendir (path); + dirstream = opendir (dir); CHROOT_OUT; - if (dir == NULL) { - reply_with_perror ("opendir: %s", path); - free (ret); - return NULL; + if (dirstream == NULL) { + reply_with_perror ("opendir: %s", dir); + return ret; } - i = 0; - while ((d = readdir (dir)) != NULL) { - guestfs_int_dirent *p; + xdr_buf = malloc (GUESTFS_MAX_CHUNK_SIZE); + if (xdr_buf == NULL) { + reply_with_perror ("malloc"); + goto close_dir; + } + xdrmem_create (&xdr, xdr_buf, GUESTFS_MAX_CHUNK_SIZE, XDR_ENCODE); + + /* Send an "OK" reply, before starting the file transfer. */ + reply (NULL, NULL); + + /* From this point on, we can only report errors by canceling the file + * transfer. + */ + for (;;) { + struct dirent *d; + guestfs_int_dirent v; + + errno = 0; + d = readdir (dirstream); + if (d == NULL) { + if (errno == 0) + ret = 0; + else + perror ("readdir"); - p = realloc (ret->guestfs_int_dirent_list_val, - sizeof (guestfs_int_dirent) * (i+1)); - v.name = strdup (d->d_name); - if (!p || !v.name) { - reply_with_perror ("allocate"); - if (p) { - free_int_dirent_list (p, i); - } else { - free_int_dirent_list (ret->guestfs_int_dirent_list_val, i); - } - free (v.name); - free (ret); - closedir (dir); - return NULL; + break; } - ret->guestfs_int_dirent_list_val = p; + v.name = d->d_name; v.ino = d->d_ino; #ifdef HAVE_STRUCT_DIRENT_D_TYPE switch (d->d_type) { @@ -104,19 +94,29 @@ do_readdir (const char *path) v.ftyp = 'u'; #endif - ret->guestfs_int_dirent_list_val[i] = v; + if (!xdr_guestfs_int_dirent (&xdr, &v)) { + fprintf (stderr, "xdr_guestfs_int_dirent failed\n"); + break; + } - i++; + if (send_file_write (xdr_buf, xdr_getpos (&xdr)) != 0) + break; + + xdr_setpos (&xdr, 0); } - ret->guestfs_int_dirent_list_len = i; + /* Finish or cancel the transfer. Note that if (ret == -1) because the library + * canceled, we still need to cancel back! + */ + send_file_end (ret == -1); - if (closedir (dir) == -1) { - reply_with_perror ("closedir"); - free (ret->guestfs_int_dirent_list_val); - free (ret); - return NULL; - } + xdr_destroy (&xdr); + free (xdr_buf); + +close_dir: + if (closedir (dirstream) == -1) + /* Best we can do here is log an error. */ + perror ("closedir"); return ret; } diff --git a/generator/actions_core.ml b/generator/actions_core.ml index dc12fdc33..807150615 100644 --- a/generator/actions_core.ml +++ b/generator/actions_core.ml @@ -141,6 +141,66 @@ only useful for printing debug and internal error messages. For more information on states, see L." }; + { defaults with + name = "readdir"; added = (1, 0, 55); + style = RStructList ("entries", "dirent"), [String (Pathname, "dir")], []; + progress = true; cancellable = true; + shortdesc = "read directories entries"; + longdesc = "\ +This returns the list of directory entries in directory C. + +All entries in the directory are returned, including C<.> and +C<..>. The entries are I sorted, but returned in the same +order as the underlying filesystem. + +Also this call returns basic file type information about each +file. The C field will contain one of the following characters: + +=over 4 + +=item 'b' + +Block special + +=item 'c' + +Char special + +=item 'd' + +Directory + +=item 'f' + +FIFO (named pipe) + +=item 'l' + +Symbolic link + +=item 'r' + +Regular file + +=item 's' + +Socket + +=item 'u' + +Unknown file type + +=item '?' + +The L call returned a C field with an +unexpected value + +=back + +This function is primarily intended for use by programs. To +get a simple list of names, use C. To get a printable +directory for human consumption, use C." }; + { defaults with name = "version"; added = (1, 0, 58); style = RStruct ("version", "version"), [], []; @@ -3939,66 +3999,6 @@ L, C, C. This call returns the previous umask." }; - { defaults with - name = "readdir"; added = (1, 0, 55); - style = RStructList ("entries", "dirent"), [String (Pathname, "dir")], []; - protocol_limit_warning = true; - shortdesc = "read directories entries"; - longdesc = "\ -This returns the list of directory entries in directory C. - -All entries in the directory are returned, including C<.> and -C<..>. The entries are I sorted, but returned in the same -order as the underlying filesystem. - -Also this call returns basic file type information about each -file. The C field will contain one of the following characters: - -=over 4 - -=item 'b' - -Block special - -=item 'c' - -Char special - -=item 'd' - -Directory - -=item 'f' - -FIFO (named pipe) - -=item 'l' - -Symbolic link - -=item 'r' - -Regular file - -=item 's' - -Socket - -=item 'u' - -Unknown file type - -=item '?' - -The L call returned a C field with an -unexpected value - -=back - -This function is primarily intended for use by programs. To -get a simple list of names, use C. To get a printable -directory for human consumption, use C." }; - { defaults with name = "getxattrs"; added = (1, 0, 59); style = RStructList ("xattrs", "xattr"), [String (Pathname, "path")], []; @@ -9713,4 +9713,11 @@ C. The C parameter must be the name of the mapping device (ie. F) and I the name of the underlying block device." }; + { defaults with + name = "internal_readdir"; added = (1, 48, 2); + style = RErr, [String (Pathname, "dir"); String (FileOut, "filename")], []; + visibility = VInternal; + shortdesc = "read directories entries"; + longdesc = "Internal function for readdir." }; + ] diff --git a/generator/proc_nr.ml b/generator/proc_nr.ml index b20672ff0..bdced51c9 100644 --- a/generator/proc_nr.ml +++ b/generator/proc_nr.ml @@ -152,7 +152,6 @@ let proc_nr = [ 135, "mknod_b"; 136, "mknod_c"; 137, "umask"; -138, "readdir"; 139, "sfdiskM"; 140, "zfile"; 141, "getxattrs"; @@ -514,6 +513,7 @@ let proc_nr = [ 508, "cryptsetup_open"; 509, "cryptsetup_close"; 510, "internal_list_rpm_applications"; +511, "internal_readdir"; ] (* End of list. If adding a new entry, add it at the end of the list diff --git a/lib/MAX_PROC_NR b/lib/MAX_PROC_NR index 2bc4cd64b..c0556fb20 100644 --- a/lib/MAX_PROC_NR +++ b/lib/MAX_PROC_NR @@ -1 +1 @@ -510 +511 diff --git a/lib/Makefile.am b/lib/Makefile.am index 144c45588..212bcb94a 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -105,6 +105,7 @@ libguestfs_la_SOURCES = \ private-data.c \ proto.c \ qemu.c \ + readdir.c \ rescue.c \ stringsbuf.c \ structs-compare.c \ diff --git a/lib/readdir.c b/lib/readdir.c new file mode 100644 index 000000000..9cb0d7cf6 --- /dev/null +++ b/lib/readdir.c @@ -0,0 +1,131 @@ +/* libguestfs + * Copyright (C) 2016-2022 Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include /* UNIX_PATH_MAX, needed by "guestfs-internal.h" */ + +#include /* xdrstdio_create() */ +#include /* UINT32_MAX */ +#include /* fopen() */ +#include /* memset() */ + +#include "guestfs.h" /* guestfs_internal_readdir() */ +#include "guestfs_protocol.h" /* guestfs_int_dirent */ +#include "guestfs-internal.h" /* guestfs_int_make_temp_path() */ +#include "guestfs-internal-actions.h" /* guestfs_impl_readdir */ + +struct guestfs_dirent_list * +guestfs_impl_readdir (guestfs_h *g, const char *dir) +{ + struct guestfs_dirent_list *ret; + char *tmpfn; + FILE *f; + off_t fsize; + XDR xdr; + struct guestfs_dirent_list *dirents; + uint32_t alloc_entries; + size_t alloc_bytes; + + /* Prepare to fail. */ + ret = NULL; + + tmpfn = guestfs_int_make_temp_path (g, "readdir", NULL); + if (tmpfn == NULL) + return ret; + + if (guestfs_internal_readdir (g, dir, tmpfn) == -1) + goto drop_tmpfile; + + f = fopen (tmpfn, "r"); + if (f == NULL) { + perrorf (g, "fopen: %s", tmpfn); + goto drop_tmpfile; + } + + if (fseeko (f, 0, SEEK_END) == -1) { + perrorf (g, "fseeko"); + goto close_tmpfile; + } + fsize = ftello (f); + if (fsize == -1) { + perrorf (g, "ftello"); + goto close_tmpfile; + } + if (fseeko (f, 0, SEEK_SET) == -1) { + perrorf (g, "fseeko"); + goto close_tmpfile; + } + + xdrstdio_create (&xdr, f, XDR_DECODE); + + dirents = safe_malloc (g, sizeof *dirents); + dirents->len = 0; + alloc_entries = 8; + alloc_bytes = alloc_entries * sizeof *dirents->val; + dirents->val = safe_malloc (g, alloc_bytes); + + while (xdr_getpos (&xdr) < fsize) { + guestfs_int_dirent v; + struct guestfs_dirent *d; + + if (dirents->len == alloc_entries) { + if (alloc_entries > UINT32_MAX / 2 || alloc_bytes > (size_t)-1 / 2) { + error (g, "integer overflow"); + goto free_dirents; + } + alloc_entries *= 2u; + alloc_bytes *= 2u; + dirents->val = safe_realloc (g, dirents->val, alloc_bytes); + } + + /* Decoding does not work unless the target buffer is zero-initialized. */ + memset (&v, 0, sizeof v); + if (!xdr_guestfs_int_dirent (&xdr, &v)) { + error (g, "xdr_guestfs_int_dirent failed"); + goto free_dirents; + } + + d = &dirents->val[dirents->len]; + d->ino = v.ino; + d->ftyp = v.ftyp; + d->name = v.name; /* transfer malloc'd string to "d" */ + + dirents->len++; + } + + /* Success; transfer "dirents" to "ret". */ + ret = dirents; + dirents = NULL; + + /* Clean up. */ + xdr_destroy (&xdr); + +free_dirents: + guestfs_free_dirent_list (dirents); + +close_tmpfile: + fclose (f); + +drop_tmpfile: + /* In case guestfs_internal_readdir() failed, it may or may not have created + * the temporary file. + */ + unlink (tmpfn); + free (tmpfn); + + return ret; +} -- 2.31.1