From fcd169476f403f9f4f65dddd7c4f05a76b35baeb Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Sat, 10 May 2025 18:29:40 +0100 Subject: [PATCH] Update common submodule Adds these commits: Richard W.M. Jones (5): mlstdutils: Modify List.take, List.drop to match OCaml 5.3 mlstdutils: Rename List.dropwhile -> drop_while, takewhile -> take_while mlstdutils: Add List.last function mlstdutils: Move List module first mlstdutils: Add String.common_prefix, longest_common_prefix --- common | 2 +- contrib/visualize-alignment/tracetops.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) Submodule common aa797fa13..4c7ae1581: diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml index 8462752..159fc4a 100644 --- a/common/mlstdutils/std_utils.ml +++ b/common/mlstdutils/std_utils.ml @@ -18,6 +18,138 @@ open Printf +module List = struct + include List + + (* Drop elements from a list while a predicate is true. *) + let rec drop_while f = function + | [] -> [] + | x :: xs when f x -> drop_while f xs + | xs -> xs + + (* Take elements from a list while a predicate is true. *) + let rec take_while f = function + | x :: xs when f x -> x :: take_while f xs + | _ -> [] + + let take n xs = + if n < 0 then invalid_arg "List.take" + else if n = 0 then [] + else ( + (* This optimisation avoids copying xs. *) + let len = List.length xs in + if len <= n then xs + else ( + let rec take n = function + | x :: xs when n >= 1 -> x :: take (n-1) xs + | _ -> [] + in + take n xs + ) + ) + let rec drop n xs = + if n < 0 then invalid_arg "List.drop" + else if n = 0 then xs + else if xs = [] then [] + else drop (n-1) (List.tl xs) + + let rec last = function + | [] -> invalid_arg "List.last" + | [x] -> x + | _ :: xs -> last xs + + let rec filter_map f = function + | [] -> [] + | x :: xs -> + match f x with + | Some y -> y :: filter_map f xs + | None -> filter_map f xs + + let rec find_map f = function + | [] -> raise Not_found + | x :: xs -> + match f x with + | Some y -> y + | None -> find_map f xs + + let rec group_by = function + | [] -> [] + | (day1, x1) :: (day2, x2) :: rest when day1 = day2 -> + let rest = group_by ((day2, x2) :: rest) in + let day, xs = List.hd rest in + (day, x1 :: xs) :: List.tl rest + | (day, x) :: rest -> + (day, [x]) :: group_by rest + + let rec combine3 xs ys zs = + match xs, ys, zs with + | [], [], [] -> [] + | x::xs, y::ys, z::zs -> (x, y, z) :: combine3 xs ys zs + | _ -> invalid_arg "combine3" + + let rec assoc_lbl ?(cmp = Stdlib.compare) ~default x = function + | [] -> default + | (y, y') :: _ when cmp x y = 0 -> y' + | _ :: ys -> assoc_lbl ~cmp ~default x ys + + let uniq ?(cmp = Stdlib.compare) xs = + let rec loop acc = function + | [] -> acc + | [x] -> x :: acc + | x :: (y :: _ as xs) when cmp x y = 0 -> + loop acc xs + | x :: (y :: _ as xs) -> + loop (x :: acc) xs + in + List.rev (loop [] xs) + + let remove_duplicates xs = + let h = Hashtbl.create (List.length xs) in + let rec loop = function + | [] -> [] + | x :: xs when Hashtbl.mem h x -> xs + | x :: xs -> Hashtbl.add h x true; x :: loop xs + in + loop xs + + let push_back xsp x = xsp := !xsp @ [x] + let push_front x xsp = xsp := x :: !xsp + let pop_back xsp = + let x, xs = + match List.rev !xsp with + | x :: xs -> x, xs + | [] -> failwith "pop" in + xsp := List.rev xs; + x + let pop_front xsp = + let x, xs = + match !xsp with + | x :: xs -> x, xs + | [] -> failwith "shift" in + xsp := xs; + x + + let may_push_back xsp x = + match x with None -> () | Some x -> push_back xsp x + let may_push_front x xsp = + match x with None -> () | Some x -> push_front x xsp + + let push_back_list xsp xs = xsp := !xsp @ xs + let push_front_list xs xsp = xsp := xs @ !xsp + + let make n x = + let rec loop acc = function + | 0 -> acc + | i when i > 0 -> loop (x :: acc) (i-1) + | _ -> invalid_arg "make" + in + loop [] n + + let same = function + | [] -> true + | x :: xs -> List.for_all ((=) x) xs +end + module Char = struct include Char @@ -302,131 +434,23 @@ module String = struct loop 0 let unix2dos str = replace str "\n" "\r\n" -end -module List = struct - include List - - (* Drop elements from a list while a predicate is true. *) - let rec dropwhile f = function - | [] -> [] - | x :: xs when f x -> dropwhile f xs - | xs -> xs - - (* Take elements from a list while a predicate is true. *) - let rec takewhile f = function - | x :: xs when f x -> x :: takewhile f xs - | _ -> [] - - let take n xs = - if n <= 0 then [] - else ( - (* This optimisation avoids copying xs. *) - let len = List.length xs in - if len <= n then xs - else ( - let rec take n = function - | x :: xs when n >= 1 -> x :: take (n-1) xs - | _ -> [] - in - take n xs - ) - ) - let rec drop n xs = - if n <= 0 then xs - else if xs = [] then [] - else drop (n-1) (List.tl xs) - - let rec filter_map f = function - | [] -> [] - | x :: xs -> - match f x with - | Some y -> y :: filter_map f xs - | None -> filter_map f xs - - let rec find_map f = function - | [] -> raise Not_found - | x :: xs -> - match f x with - | Some y -> y - | None -> find_map f xs - - let rec group_by = function - | [] -> [] - | (day1, x1) :: (day2, x2) :: rest when day1 = day2 -> - let rest = group_by ((day2, x2) :: rest) in - let day, xs = List.hd rest in - (day, x1 :: xs) :: List.tl rest - | (day, x) :: rest -> - (day, [x]) :: group_by rest - - let rec combine3 xs ys zs = - match xs, ys, zs with - | [], [], [] -> [] - | x::xs, y::ys, z::zs -> (x, y, z) :: combine3 xs ys zs - | _ -> invalid_arg "combine3" - - let rec assoc_lbl ?(cmp = Stdlib.compare) ~default x = function - | [] -> default - | (y, y') :: _ when cmp x y = 0 -> y' - | _ :: ys -> assoc_lbl ~cmp ~default x ys - - let uniq ?(cmp = Stdlib.compare) xs = - let rec loop acc = function - | [] -> acc - | [x] -> x :: acc - | x :: (y :: _ as xs) when cmp x y = 0 -> - loop acc xs - | x :: (y :: _ as xs) -> - loop (x :: acc) xs + let rec longest_common_prefix = function + | [] -> "" + | [s] -> s + | strs -> + let strs = List.sort compare strs in + let s1 = List.hd strs and s2 = List.last strs in + common_prefix s1 s2 + and common_prefix s1 s2 = + let n1 = length s1 and n2 = length s2 in + let n = min n1 n2 in + let rec loop i = + if i = n then sub s1 0 n + else if unsafe_get s1 i <> unsafe_get s2 i then sub s1 0 i + else loop (i+1) in - List.rev (loop [] xs) - - let remove_duplicates xs = - let h = Hashtbl.create (List.length xs) in - let rec loop = function - | [] -> [] - | x :: xs when Hashtbl.mem h x -> xs - | x :: xs -> Hashtbl.add h x true; x :: loop xs - in - loop xs - - let push_back xsp x = xsp := !xsp @ [x] - let push_front x xsp = xsp := x :: !xsp - let pop_back xsp = - let x, xs = - match List.rev !xsp with - | x :: xs -> x, xs - | [] -> failwith "pop" in - xsp := List.rev xs; - x - let pop_front xsp = - let x, xs = - match !xsp with - | x :: xs -> x, xs - | [] -> failwith "shift" in - xsp := xs; - x - - let may_push_back xsp x = - match x with None -> () | Some x -> push_back xsp x - let may_push_front x xsp = - match x with None -> () | Some x -> push_front x xsp - - let push_back_list xsp xs = xsp := !xsp @ xs - let push_front_list xs xsp = xsp := xs @ !xsp - - let make n x = - let rec loop acc = function - | 0 -> acc - | i when i > 0 -> loop (x :: acc) (i-1) - | _ -> invalid_arg "make" - in - loop [] n - - let same = function - | [] -> true - | x :: xs -> List.for_all ((=) x) xs + loop 0 end let (//) = Filename.concat diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli index 566c5b5..2526100 100644 --- a/common/mlstdutils/std_utils.mli +++ b/common/mlstdutils/std_utils.mli @@ -16,138 +16,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *) -module Char : sig - type t = char - val chr : int -> char - val code : char -> int - val compare: t -> t -> int - val escaped : char -> string - val unsafe_chr : int -> char - - val lowercase_ascii : char -> char - val uppercase_ascii : char -> char - - val isspace : char -> bool - (** Return true if char is a whitespace character. *) - val isdigit : char -> bool - (** Return true if the character is a digit [[0-9]]. *) - val isxdigit : char -> bool - (** Return true if the character is a hex digit [[0-9a-fA-F]]. *) - val isalpha : char -> bool - (** Return true if the character is a US ASCII 7 bit alphabetic. *) - val isalnum : char -> bool - (** Return true if the character is a US ASCII 7 bit alphanumeric. *) - - val hexdigit : char -> int - (** Return the value of a hex digit. If the char is not in - the set [[0-9a-fA-F]] then this returns [-1]. *) - - val mem : char -> string -> bool - (** [mem c str] returns true if the byte [c] is contained in [str]. - - This is actually the same as {!String.contains} with the - parameters reversed. *) -end -(** Override the Char module from stdlib. *) - -module String : sig - type t = string - val compare: t -> t -> int - val concat : string -> string list -> string - val contains : string -> char -> bool - val contains_from : string -> int -> char -> bool - val escaped : string -> string - val get : string -> int -> char - val index : string -> char -> int - val index_from : string -> int -> char -> int - val iter : (char -> unit) -> string -> unit - val iteri : (int -> char -> unit) -> string -> unit - val map : (char -> char) -> string -> string - val length : string -> int - val make : int -> char -> string - val rcontains_from : string -> int -> char -> bool - val rindex : string -> char -> int - val rindex_from : string -> int -> char -> int - val sub : string -> int -> int -> string - val unsafe_get : string -> int -> char - - val lowercase_ascii : string -> string - val uppercase_ascii : string -> string - val capitalize_ascii : string -> string - - val is_prefix : string -> string -> bool - (** [is_prefix str prefix] returns true if [prefix] is a prefix of [str]. *) - val is_suffix : string -> string -> bool - (** [is_suffix str suffix] returns true if [suffix] is a suffix of [str]. *) - val find : string -> string -> int - (** [find str sub] searches for [sub] as a substring of [str]. If - found it returns the index. If not found, it returns [-1]. *) - val find_from : string -> int -> string -> int - (** [find_from str start sub] searches for [sub] as a substring of [str], - starting at index [start]. If found it returns the index. - If not found, it returns [-1]. *) - val replace : string -> string -> string -> string - (** [replace str s1 s2] replaces all instances of [s1] appearing in - [str] with [s2]. *) - val replace_char : string -> char -> char -> string - (** Replace character in string. *) - val break : int -> string -> string * string - (** [break n str] breaks a string at the nth byte, returning the - first and second parts. If [n] is beyond the end of the - string it returns [(str, "")]. *) - val split : string -> string -> string * string - (** [split sep str] splits [str] at the first occurrence of the - separator [sep], returning the part before and the part after. - If separator is not found, return the whole string and an - empty string. *) - val nsplit : ?max:int -> string -> string -> string list - (** [nsplit ?max sep str] splits [str] into multiple strings at each - separator [sep]. - - As with the Perl split function, you can give an optional - [?max] parameter to limit the number of strings returned. The - final element of the list will contain the remainder of the - input string. *) - val lines_split : string -> string list - (** [lines_split str] splits [str] into lines, keeping continuation - characters (i.e. [\] at the end of lines) into account. *) - val random8 : unit -> string - (** Return a string of 8 random printable characters. *) - val triml : ?test:(char -> bool) -> string -> string - (** Trim left. *) - val trimr : ?test:(char -> bool) -> string -> string - (** Trim right. *) - val trim : ?test:(char -> bool) -> string -> string - (** Trim left and right. *) - val chomp : string -> string - (** If the string ends with [\n], remove it. *) - val count_chars : char -> string -> int - (** Count number of times the character occurs in string. *) - val explode : string -> char list - (** Explode a string into a list of characters. *) - val map_chars : (char -> 'a) -> string -> 'a list - (** Explode string, then map function over the characters. *) - val implode : char list -> string - (** Join list of characters into a single string. *) - val spaces : int -> string - (** [spaces n] creates a string of n spaces. *) - val span : string -> string -> int - val cspan : string -> string -> int - (** [span str accept] returns the length in bytes of the initial - segment of [str] which contains only bytes in [accept]. - - [cspan str reject] returns the length in bytes of the initial - segment of [str] which contains only bytes {!i not} in [reject]. - - These work exactly like the C functions [strspn] and [strcspn]. *) - val unix2dos : string -> string - (** Convert string with ordinary Unix-style line-endings to - CRLF DOS-style line-endings. - - The same as {!String.replace} [str "\n" "\r\n"]. *) -end -(** Override the String module from stdlib. *) - module List : sig val length : 'a list -> int val hd : 'a list -> 'a @@ -194,11 +62,11 @@ module List : sig val sort_uniq : ('a -> 'a -> int) -> 'a list -> 'a list val merge : ('a -> 'a -> int) -> 'a list -> 'a list -> 'a list - val dropwhile : ('a -> bool) -> 'a list -> 'a list - (** [dropwhile f xs] drops leading elements from [xs] until + val drop_while : ('a -> bool) -> 'a list -> 'a list + (** [drop_while f xs] drops leading elements from [xs] until [f] returns false. *) - val takewhile : ('a -> bool) -> 'a list -> 'a list - (** [takewhile f xs] takes leading elements from [xs] until + val take_while : ('a -> bool) -> 'a list -> 'a list + (** [take_while f xs] takes leading elements from [xs] until [f] returns false. For any list [xs] and function [f], @@ -206,12 +74,21 @@ module List : sig val take : int -> 'a list -> 'a list (** [take n xs] returns the first [n] elements of [xs]. If [xs] is - shorter than [n], then it returns [xs]. Note it never fails - for any input. *) + shorter than [n], then it returns [xs]. [n] must be non-negative. + + @raise Invalid_argument if [n] is negative *) val drop : int -> 'a list -> 'a list (** [drop n xs] returns the suffix of [xs] after the first [n] elements. If [xs] is shorter than [n], then it returns the empty - list. Note it never fails for any input. *) + list. [n] must be non-negative. + + @raise Invalid_argument if [n] is negative *) + + val last : 'a list -> 'a + (** Return the last element in the list (analogous to {!List.hd} but + much less efficient). + + @raise Invalid_argument if the list is empty *) val filter_map : ('a -> 'b option) -> 'a list -> 'b list (** [filter_map f xs] applies [f] to each element of [xs]. If @@ -302,6 +179,144 @@ module List : sig end (** Override the List module from stdlib. *) +module Char : sig + type t = char + val chr : int -> char + val code : char -> int + val compare: t -> t -> int + val escaped : char -> string + val unsafe_chr : int -> char + + val lowercase_ascii : char -> char + val uppercase_ascii : char -> char + + val isspace : char -> bool + (** Return true if char is a whitespace character. *) + val isdigit : char -> bool + (** Return true if the character is a digit [[0-9]]. *) + val isxdigit : char -> bool + (** Return true if the character is a hex digit [[0-9a-fA-F]]. *) + val isalpha : char -> bool + (** Return true if the character is a US ASCII 7 bit alphabetic. *) + val isalnum : char -> bool + (** Return true if the character is a US ASCII 7 bit alphanumeric. *) + + val hexdigit : char -> int + (** Return the value of a hex digit. If the char is not in + the set [[0-9a-fA-F]] then this returns [-1]. *) + + val mem : char -> string -> bool + (** [mem c str] returns true if the byte [c] is contained in [str]. + + This is actually the same as {!String.contains} with the + parameters reversed. *) +end +(** Override the Char module from stdlib. *) + +module String : sig + type t = string + val compare: t -> t -> int + val concat : string -> string list -> string + val contains : string -> char -> bool + val contains_from : string -> int -> char -> bool + val escaped : string -> string + val get : string -> int -> char + val index : string -> char -> int + val index_from : string -> int -> char -> int + val iter : (char -> unit) -> string -> unit + val iteri : (int -> char -> unit) -> string -> unit + val map : (char -> char) -> string -> string + val length : string -> int + val make : int -> char -> string + val rcontains_from : string -> int -> char -> bool + val rindex : string -> char -> int + val rindex_from : string -> int -> char -> int + val sub : string -> int -> int -> string + val unsafe_get : string -> int -> char + + val lowercase_ascii : string -> string + val uppercase_ascii : string -> string + val capitalize_ascii : string -> string + + val is_prefix : string -> string -> bool + (** [is_prefix str prefix] returns true if [prefix] is a prefix of [str]. *) + val is_suffix : string -> string -> bool + (** [is_suffix str suffix] returns true if [suffix] is a suffix of [str]. *) + val find : string -> string -> int + (** [find str sub] searches for [sub] as a substring of [str]. If + found it returns the index. If not found, it returns [-1]. *) + val find_from : string -> int -> string -> int + (** [find_from str start sub] searches for [sub] as a substring of [str], + starting at index [start]. If found it returns the index. + If not found, it returns [-1]. *) + val replace : string -> string -> string -> string + (** [replace str s1 s2] replaces all instances of [s1] appearing in + [str] with [s2]. *) + val replace_char : string -> char -> char -> string + (** Replace character in string. *) + val break : int -> string -> string * string + (** [break n str] breaks a string at the nth byte, returning the + first and second parts. If [n] is beyond the end of the + string it returns [(str, "")]. *) + val split : string -> string -> string * string + (** [split sep str] splits [str] at the first occurrence of the + separator [sep], returning the part before and the part after. + If separator is not found, return the whole string and an + empty string. *) + val nsplit : ?max:int -> string -> string -> string list + (** [nsplit ?max sep str] splits [str] into multiple strings at each + separator [sep]. + + As with the Perl split function, you can give an optional + [?max] parameter to limit the number of strings returned. The + final element of the list will contain the remainder of the + input string. *) + val lines_split : string -> string list + (** [lines_split str] splits [str] into lines, keeping continuation + characters (i.e. [\] at the end of lines) into account. *) + val random8 : unit -> string + (** Return a string of 8 random printable characters. *) + val triml : ?test:(char -> bool) -> string -> string + (** Trim left. *) + val trimr : ?test:(char -> bool) -> string -> string + (** Trim right. *) + val trim : ?test:(char -> bool) -> string -> string + (** Trim left and right. *) + val chomp : string -> string + (** If the string ends with [\n], remove it. *) + val count_chars : char -> string -> int + (** Count number of times the character occurs in string. *) + val explode : string -> char list + (** Explode a string into a list of characters. *) + val map_chars : (char -> 'a) -> string -> 'a list + (** Explode string, then map function over the characters. *) + val implode : char list -> string + (** Join list of characters into a single string. *) + val spaces : int -> string + (** [spaces n] creates a string of n spaces. *) + val span : string -> string -> int + val cspan : string -> string -> int + (** [span str accept] returns the length in bytes of the initial + segment of [str] which contains only bytes in [accept]. + + [cspan str reject] returns the length in bytes of the initial + segment of [str] which contains only bytes {!i not} in [reject]. + + These work exactly like the C functions [strspn] and [strcspn]. *) + val unix2dos : string -> string + (** Convert string with ordinary Unix-style line-endings to + CRLF DOS-style line-endings. + + The same as {!String.replace} [str "\n" "\r\n"]. *) + val common_prefix : string -> string -> string + (** Return the longest common prefix of two strings. *) + val longest_common_prefix : string list -> string + (** Return the longest common prefix of all the strings in the list. + If the list is empty or there is no common prefix, [""] is + returned. If there is only one string in the list, it is returned. *) +end +(** Override the String module from stdlib. *) + val ( // ) : string -> string -> string (** Concatenate directory and filename. *) diff --git a/common/mlstdutils/std_utils_tests.ml b/common/mlstdutils/std_utils_tests.ml index aadab17..abe842b 100644 --- a/common/mlstdutils/std_utils_tests.ml +++ b/common/mlstdutils/std_utils_tests.ml @@ -190,6 +190,37 @@ let () = assert_equal_string "" (String.chomp "\n"); assert_equal_string "\n" (String.chomp "\n\n") (* only removes one *) +(* Test Std_utils.String.common_prefix, longest_common_prefix. *) +let () = + assert_equal_string "" (String.common_prefix "" ""); + assert_equal_string "" (String.common_prefix "" "abc"); + assert_equal_string "" (String.common_prefix "abc" ""); + assert_equal_string "" (String.common_prefix "hello" "world"); + assert_equal_string "abc" (String.common_prefix "abc" "abcde"); + assert_equal_string "abc" (String.common_prefix "abcde" "abc"); + assert_equal_string "abc" (String.common_prefix "abcde" "abcghi"); + assert_equal_string "" (String.longest_common_prefix []); + assert_equal_string "" (String.longest_common_prefix [""]); + assert_equal_string "abc" (String.longest_common_prefix ["abc"]); + assert_equal_string "" (String.longest_common_prefix [""; "abc"]); + assert_equal_string "" (String.longest_common_prefix ["abc"; ""]); + assert_equal_string "" (String.longest_common_prefix ["hello"; "world"]); + assert_equal_string "" + (String.longest_common_prefix ["hello"; "there"; "world"]); + assert_equal_string "abc" (String.longest_common_prefix ["abc"; "abcde"]); + assert_equal_string "abc" (String.longest_common_prefix ["abcde"; "abc"]); + assert_equal_string "abc" (String.longest_common_prefix ["abcde"; "abcghi"]); + assert_equal_string "abc" + (String.longest_common_prefix ["abcde"; "abcghi"; "abc123"]); + assert_equal_string "abc" + (String.longest_common_prefix ["abc"; "abcghi"; "abc123"]); + assert_equal_string "abc" + (String.longest_common_prefix ["abcde"; "abc"; "abc123"]); + assert_equal_string "abc" + (String.longest_common_prefix ["abcde"; "abcde"; "abc"]); + assert_equal_string "abc" + (String.longest_common_prefix ["abc"; "abc"; "abc"]) + (* Test Std_utils.which. *) let () = assert_nonempty_string (which "true"); @@ -216,3 +247,9 @@ let () = let () = assert_bool "List.same []" (List.same (List.make 0 "1")); assert_bool "List.same [1; 1; ...; 1]" (List.same (List.make 10 1)) + +(* Test List.last. *) +let () = + assert_equal_string "3" (List.last ["1"; "2"; "3"]); + assert_equal_string "1" (List.last ["1"]); + assert_raises (Invalid_argument "List.last") (fun () -> List.last []) diff --git a/contrib/visualize-alignment/tracetops.ml b/contrib/visualize-alignment/tracetops.ml index b838b30f3..102a03580 100755 --- a/contrib/visualize-alignment/tracetops.ml +++ b/contrib/visualize-alignment/tracetops.ml @@ -185,7 +185,7 @@ let ranges = loop i0 rest | (false, i1) :: rest -> let i1 = i1 - 1 in - let rest = List.dropwhile (function (v, _) -> not v) rest in + let rest = List.drop_while (function (v, _) -> not v) rest in (match rest with | [] -> [i0, i1] | (_, i2) :: rest -> (i0, i1) :: loop i2 rest) -- 2.47.1