From 3915d1efcdb1e9d10c8f6966acbe5c359d824ba1 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 6 Apr 2026 14:08:10 -0700 Subject: [PATCH] [CodeGen] Preserve big-endian trunc in concat_vectors A transform from `concat_vectors(trunc(scalar), undef)` to `scalar_to_vector(scalar)` is only equivalent for little-endian targets. On big-endian, that would put the extra upper bytes ahead of the desired truncated bytes. This problem was seen on Rust s390x in [RHEL-147748]. [RHEL-147748]: https://redhat.atlassian.net/browse/RHEL-147748 Assisted-by: Claude Code --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll | 45 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 383e45c5ea3a8..5485ee86251a5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -26513,9 +26513,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + // However, this is only equivalent on little-endian targets. if (Scalar->getOpcode() == ISD::TRUNCATE && !TLI.isTypeLegal(Scalar.getValueType()) && - TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + TLI.isTypeLegal(Scalar->getOperand(0).getValueType()) && + DAG.getDataLayout().isLittleEndian()) Scalar = Scalar->getOperand(0); EVT SclTy = Scalar.getValueType(); diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll new file mode 100644 index 0000000000000..42d787d945145 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that truncated scalars use the correct vector insert instruction. +; On big-endian targets, concat_vectors should not skip truncates when +; creating scalar_to_vector, as the bytes would be in the wrong position. + +; This truncated i16 should use vlvgh (insert halfword), not vlvgf (insert fullword). +define <16 x i8> @test_concat_trunc_i16(i32 %x) { +; CHECK-LABEL: test_concat_trunc_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgh %v24, %r2, 0 +; CHECK-NEXT: br %r14 + %t = trunc i32 %x to i16 + %vec = bitcast i16 %t to <2 x i8> + %result = shufflevector <2 x i8> %vec, <2 x i8> poison, <16 x i32> + ret <16 x i8> %result +} + +; Test with a more complex shuffle pattern, reduced from a Rust bug report. +define fastcc void @test_shuffle_with_trunc() { +; CHECK-LABEL: test_shuffle_with_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r1, 0 +; CHECK-NEXT: l %r0, 0 +; CHECK-NEXT: vlvgh %v1, %r1, 0 +; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: vl %v2, 0(%r1), 3 +; CHECK-NEXT: vlvgf %v0, %r0, 0 +; CHECK-NEXT: vperm %v0, %v0, %v1, %v2 +; CHECK-NEXT: vst %v0, 0, 3 +; CHECK-NEXT: br %r14 + %1 = load i32, ptr null, align 8 + %2 = load i16, ptr null, align 1 + br label %3 + +3: + %4 = bitcast i32 %1 to <4 x i8> + %5 = shufflevector <4 x i8> %4, <4 x i8> zeroinitializer, <16 x i32> + %6 = bitcast i16 %2 to <2 x i8> + %7 = shufflevector <2 x i8> %6, <2 x i8> zeroinitializer, <16 x i32> + %8 = shufflevector <16 x i8> %5, <16 x i8> %7, <16 x i32> + store <16 x i8> %8, ptr null, align 8 + ret void +}