From 96694ca8ed18690b79e07e3d15dec5310cf360a5 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 15 Nov 2022 02:04:49 -0500 Subject: [PATCH] import glibc-2.34-40.el9 --- SOURCES/glibc-fedora-localedef.patch | 21 - SOURCES/glibc-rh2054789.patch | 27 + ...115828-1.patch => glibc-rh2085529-1.patch} | 6 +- ...115828-2.patch => glibc-rh2085529-2.patch} | 4 +- ...115828-3.patch => glibc-rh2085529-3.patch} | 0 SOURCES/glibc-rh2085529-4.patch | 38 + SOURCES/glibc-rh2096191-1.patch | 67 + SOURCES/glibc-rh2096191-2.patch | 941 +++++++ SOURCES/glibc-rh2115828-4.patch | 16 - SOURCES/glibc-upstream-2.34-108.patch | 32 + SOURCES/glibc-upstream-2.34-110.patch | 192 ++ SOURCES/glibc-upstream-2.34-111.patch | 60 + SOURCES/glibc-upstream-2.34-112.patch | 120 + SOURCES/glibc-upstream-2.34-113.patch | 139 + SOURCES/glibc-upstream-2.34-114.patch | 32 + SOURCES/glibc-upstream-2.34-117.patch | 104 + SOURCES/glibc-upstream-2.34-118.patch | 146 + SOURCES/glibc-upstream-2.34-119.patch | 177 ++ SOURCES/glibc-upstream-2.34-120.patch | 27 + SOURCES/glibc-upstream-2.34-121.patch | 122 + SOURCES/glibc-upstream-2.34-122.patch | 26 + SOURCES/glibc-upstream-2.34-123.patch | 56 + SOURCES/glibc-upstream-2.34-124.patch | 224 ++ SOURCES/glibc-upstream-2.34-125.patch | 345 +++ SOURCES/glibc-upstream-2.34-126.patch | 1038 +++++++ SOURCES/glibc-upstream-2.34-127.patch | 358 +++ SOURCES/glibc-upstream-2.34-128.patch | 24 + SOURCES/glibc-upstream-2.34-129.patch | 91 + SOURCES/glibc-upstream-2.34-130.patch | 42 + SOURCES/glibc-upstream-2.34-131.patch | 293 ++ SOURCES/glibc-upstream-2.34-132.patch | 356 +++ SOURCES/glibc-upstream-2.34-133.patch | 178 ++ SOURCES/glibc-upstream-2.34-134.patch | 20 + SOURCES/glibc-upstream-2.34-135.patch | 35 + SOURCES/glibc-upstream-2.34-136.patch | 2020 ++++++++++++++ SOURCES/glibc-upstream-2.34-137.patch | 241 ++ SOURCES/glibc-upstream-2.34-138.patch | 1115 ++++++++ SOURCES/glibc-upstream-2.34-139.patch | 221 ++ SOURCES/glibc-upstream-2.34-140.patch | 69 + SOURCES/glibc-upstream-2.34-141.patch | 390 +++ SOURCES/glibc-upstream-2.34-142.patch | 159 ++ SOURCES/glibc-upstream-2.34-143.patch | 254 ++ SOURCES/glibc-upstream-2.34-144.patch | 157 ++ SOURCES/glibc-upstream-2.34-145.patch | 123 + SOURCES/glibc-upstream-2.34-146.patch | 334 +++ SOURCES/glibc-upstream-2.34-147.patch | 107 + SOURCES/glibc-upstream-2.34-148.patch | 206 ++ SOURCES/glibc-upstream-2.34-149.patch | 715 +++++ SOURCES/glibc-upstream-2.34-150.patch | 454 +++ SOURCES/glibc-upstream-2.34-151.patch | 294 ++ SOURCES/glibc-upstream-2.34-152.patch | 314 +++ SOURCES/glibc-upstream-2.34-153.patch | 167 ++ SOURCES/glibc-upstream-2.34-154.patch | 434 +++ SOURCES/glibc-upstream-2.34-155.patch | 299 ++ SOURCES/glibc-upstream-2.34-156.patch | 1778 ++++++++++++ SOURCES/glibc-upstream-2.34-157.patch | 1042 +++++++ SOURCES/glibc-upstream-2.34-158.patch | 23 + SOURCES/glibc-upstream-2.34-159.patch | 27 + SOURCES/glibc-upstream-2.34-160.patch | 114 + SOURCES/glibc-upstream-2.34-162.patch | 242 ++ SOURCES/glibc-upstream-2.34-163.patch | 834 ++++++ SOURCES/glibc-upstream-2.34-164.patch | 23 + SOURCES/glibc-upstream-2.34-165.patch | 104 + SOURCES/glibc-upstream-2.34-166.patch | 35 + SOURCES/glibc-upstream-2.34-167.patch | 1446 ++++++++++ SOURCES/glibc-upstream-2.34-168.patch | 407 +++ SOURCES/glibc-upstream-2.34-169.patch | 87 + SOURCES/glibc-upstream-2.34-170.patch | 49 + SOURCES/glibc-upstream-2.34-171.patch | 377 +++ SOURCES/glibc-upstream-2.34-172.patch | 28 + SOURCES/glibc-upstream-2.34-173.patch | 254 ++ SOURCES/glibc-upstream-2.34-174.patch | 42 + SOURCES/glibc-upstream-2.34-175.patch | 653 +++++ SOURCES/glibc-upstream-2.34-176.patch | 497 ++++ SOURCES/glibc-upstream-2.34-177.patch | 40 + SOURCES/glibc-upstream-2.34-178.patch | 690 +++++ SOURCES/glibc-upstream-2.34-179.patch | 85 + SOURCES/glibc-upstream-2.34-180.patch | 48 + SOURCES/glibc-upstream-2.34-181.patch | 843 ++++++ SOURCES/glibc-upstream-2.34-182.patch | 131 + SOURCES/glibc-upstream-2.34-183.patch | 2423 +++++++++++++++++ SOURCES/glibc-upstream-2.34-184.patch | 104 + SOURCES/glibc-upstream-2.34-185.patch | 30 + SOURCES/glibc-upstream-2.34-186.patch | 384 +++ SOURCES/glibc-upstream-2.34-187.patch | 42 + SOURCES/glibc-upstream-2.34-188.patch | 39 + SOURCES/glibc-upstream-2.34-189.patch | 116 + SOURCES/glibc-upstream-2.34-190.patch | 189 ++ SOURCES/glibc-upstream-2.34-191.patch | 35 + SOURCES/glibc-upstream-2.34-192.patch | 27 + SOURCES/glibc-upstream-2.34-193.patch | 28 + SOURCES/glibc-upstream-2.34-194.patch | 337 +++ SOURCES/glibc-upstream-2.34-195.patch | 27 + SOURCES/glibc-upstream-2.34-196.patch | 27 + SOURCES/glibc-upstream-2.34-197.patch | 26 + SOURCES/glibc-upstream-2.34-198.patch | 21 + SOURCES/glibc-upstream-2.34-199.patch | 21 + SOURCES/glibc-upstream-2.34-200.patch | 29 + SOURCES/glibc-upstream-2.34-201.patch | 1789 ++++++++++++ SOURCES/glibc-upstream-2.34-202.patch | 1987 ++++++++++++++ SOURCES/glibc-upstream-2.34-203.patch | 29 + SOURCES/glibc-upstream-2.34-204.patch | 29 + SOURCES/glibc-upstream-2.34-205.patch | 451 +++ SOURCES/glibc-upstream-2.34-206.patch | 35 + SOURCES/glibc-upstream-2.34-207.patch | 719 +++++ SOURCES/glibc-upstream-2.34-208.patch | 29 + SOURCES/glibc-upstream-2.34-209.patch | 76 + SOURCES/glibc-upstream-2.34-210.patch | 71 + SOURCES/glibc-upstream-2.34-211.patch | 170 ++ SOURCES/glibc-upstream-2.34-212.patch | 256 ++ SOURCES/glibc-upstream-2.34-213.patch | 31 + SOURCES/glibc-upstream-2.34-214.patch | 22 + SOURCES/glibc-upstream-2.34-215.patch | 98 + SOURCES/glibc-upstream-2.34-216.patch | 32 + SOURCES/glibc-upstream-2.34-217.patch | 24 + SOURCES/glibc-upstream-2.34-218.patch | 367 +++ SOURCES/glibc-upstream-2.34-219.patch | 338 +++ SOURCES/glibc-upstream-2.34-220.patch | 143 + SOURCES/glibc-upstream-2.34-221.patch | 143 + SOURCES/glibc-upstream-2.34-222.patch | 164 ++ SOURCES/glibc-upstream-2.34-223.patch | 44 + SOURCES/glibc-upstream-2.34-224.patch | 157 ++ SOURCES/glibc-upstream-2.34-225.patch | 118 + SOURCES/glibc-upstream-2.34-226.patch | 139 + SOURCES/glibc-upstream-2.34-227.patch | 744 +++++ SOURCES/glibc-upstream-2.34-228.patch | 803 ++++++ SOURCES/glibc-upstream-2.34-229.patch | 902 ++++++ SOURCES/glibc-upstream-2.34-230.patch | 253 ++ SOURCES/glibc-upstream-2.34-231.patch | 956 +++++++ SOURCES/glibc-upstream-2.34-232.patch | 259 ++ SOURCES/glibc-upstream-2.34-233.patch | 865 ++++++ SOURCES/glibc-upstream-2.34-234.patch | 497 ++++ SOURCES/glibc-upstream-2.34-235.patch | 554 ++++ SOURCES/glibc-upstream-2.34-236.patch | 35 + SOURCES/glibc-upstream-2.34-237.patch | 751 +++++ SOURCES/glibc-upstream-2.34-238.patch | 120 + SOURCES/glibc-upstream-2.34-239.patch | 55 + SOURCES/glibc-upstream-2.34-240.patch | 70 + SOURCES/glibc-upstream-2.34-241.patch | 410 +++ SOURCES/glibc-upstream-2.34-242.patch | 399 +++ SOURCES/glibc-upstream-2.34-243.patch | 36 + SOURCES/glibc-upstream-2.34-244.patch | 439 +++ SOURCES/glibc-upstream-2.34-245.patch | 197 ++ SOURCES/glibc-upstream-2.34-246.patch | 31 + SOURCES/glibc-upstream-2.34-247.patch | 94 + SOURCES/glibc-upstream-2.34-248.patch | 93 + SOURCES/glibc-upstream-2.34-249.patch | 88 + SOURCES/glibc-upstream-2.34-250.patch | 121 + SOURCES/glibc-upstream-2.34-251.patch | 150 + SOURCES/glibc-upstream-2.34-252.patch | 510 ++++ SOURCES/glibc-upstream-2.34-253.patch | 350 +++ SOURCES/glibc-upstream-2.34-254.patch | 301 ++ SOURCES/glibc-upstream-2.34-255.patch | 105 + SOURCES/glibc-upstream-2.34-256.patch | 39 + SOURCES/glibc-upstream-2.34-257.patch | 51 + SOURCES/glibc-upstream-2.34-258.patch | 737 +++++ SOURCES/glibc-upstream-2.34-259.patch | 30 + SOURCES/glibc-upstream-2.34-260.patch | 34 + SOURCES/glibc-upstream-2.34-261.patch | 56 + SOURCES/glibc-upstream-2.34-262.patch | 28 + SOURCES/glibc-upstream-2.34-263.patch | 31 + SOURCES/glibc-upstream-2.34-264.patch | 34 + SOURCES/glibc-upstream-2.34-265.patch | 34 + SOURCES/glibc-upstream-2.34-266.patch | 47 + SOURCES/glibc-upstream-2.34-267.patch | 21 + ...50.patch => glibc-upstream-2.34-268.patch} | 3 +- SOURCES/glibc-upstream-2.34-269.patch | 126 + SOURCES/glibc-upstream-2.34-270.patch | 83 + SOURCES/glibc-upstream-2.34-271.patch | 84 + SOURCES/glibc-upstream-2.34-272.patch | 37 + SOURCES/glibc-upstream-2.34-273.patch | 74 + SOURCES/glibc-upstream-2.34-274.patch | 27 + SOURCES/glibc-upstream-2.34-275.patch | 25 + SOURCES/glibc-upstream-2.34-276.patch | 29 + SOURCES/glibc-upstream-2.34-277.patch | 457 ++++ SOURCES/glibc-upstream-2.34-278.patch | 460 ++++ SOURCES/glibc-upstream-2.34-279.patch | 33 + SOURCES/glibc-upstream-2.34-280.patch | 356 +++ SOURCES/glibc-upstream-2.34-281.patch | 385 +++ SOURCES/glibc-upstream-2.34-282.patch | 90 + SOURCES/glibc-upstream-2.34-283.patch | 696 +++++ SOURCES/glibc-upstream-2.34-284.patch | 624 +++++ SOURCES/glibc-upstream-2.34-285.patch | 645 +++++ SOURCES/glibc-upstream-2.34-286.patch | 323 +++ SOURCES/glibc-upstream-2.34-287.patch | 130 + SOURCES/glibc-upstream-2.34-288.patch | 33 + SOURCES/glibc-upstream-2.34-289.patch | 41 + SOURCES/glibc-upstream-2.34-290.patch | 56 + SOURCES/glibc-upstream-2.34-291.patch | 38 + SOURCES/glibc-upstream-2.34-292.patch | 54 + SOURCES/glibc-upstream-2.34-293.patch | 88 + SOURCES/glibc-upstream-2.34-294.patch | 27 + SOURCES/glibc-upstream-2.34-295.patch | 28 + SOURCES/glibc-upstream-2.34-296.patch | 56 + SOURCES/glibc-upstream-2.34-297.patch | 25 + SOURCES/glibc-upstream-2.34-298.patch | 124 + SOURCES/glibc-upstream-2.34-299.patch | 163 ++ SOURCES/glibc-upstream-2.34-300.patch | 38 + SOURCES/glibc-upstream-2.34-301.patch | 28 + SOURCES/glibc-upstream-2.34-302.patch | 44 + SPECS/glibc.spec | 481 +++- 201 files changed, 51976 insertions(+), 60 deletions(-) delete mode 100644 SOURCES/glibc-fedora-localedef.patch create mode 100644 SOURCES/glibc-rh2054789.patch rename SOURCES/{glibc-rh2115828-1.patch => glibc-rh2085529-1.patch} (99%) rename SOURCES/{glibc-rh2115828-2.patch => glibc-rh2085529-2.patch} (98%) rename SOURCES/{glibc-rh2115828-3.patch => glibc-rh2085529-3.patch} (100%) create mode 100644 SOURCES/glibc-rh2085529-4.patch create mode 100644 SOURCES/glibc-rh2096191-1.patch create mode 100644 SOURCES/glibc-rh2096191-2.patch delete mode 100644 SOURCES/glibc-rh2115828-4.patch create mode 100644 SOURCES/glibc-upstream-2.34-108.patch create mode 100644 SOURCES/glibc-upstream-2.34-110.patch create mode 100644 SOURCES/glibc-upstream-2.34-111.patch create mode 100644 SOURCES/glibc-upstream-2.34-112.patch create mode 100644 SOURCES/glibc-upstream-2.34-113.patch create mode 100644 SOURCES/glibc-upstream-2.34-114.patch create mode 100644 SOURCES/glibc-upstream-2.34-117.patch create mode 100644 SOURCES/glibc-upstream-2.34-118.patch create mode 100644 SOURCES/glibc-upstream-2.34-119.patch create mode 100644 SOURCES/glibc-upstream-2.34-120.patch create mode 100644 SOURCES/glibc-upstream-2.34-121.patch create mode 100644 SOURCES/glibc-upstream-2.34-122.patch create mode 100644 SOURCES/glibc-upstream-2.34-123.patch create mode 100644 SOURCES/glibc-upstream-2.34-124.patch create mode 100644 SOURCES/glibc-upstream-2.34-125.patch create mode 100644 SOURCES/glibc-upstream-2.34-126.patch create mode 100644 SOURCES/glibc-upstream-2.34-127.patch create mode 100644 SOURCES/glibc-upstream-2.34-128.patch create mode 100644 SOURCES/glibc-upstream-2.34-129.patch create mode 100644 SOURCES/glibc-upstream-2.34-130.patch create mode 100644 SOURCES/glibc-upstream-2.34-131.patch create mode 100644 SOURCES/glibc-upstream-2.34-132.patch create mode 100644 SOURCES/glibc-upstream-2.34-133.patch create mode 100644 SOURCES/glibc-upstream-2.34-134.patch create mode 100644 SOURCES/glibc-upstream-2.34-135.patch create mode 100644 SOURCES/glibc-upstream-2.34-136.patch create mode 100644 SOURCES/glibc-upstream-2.34-137.patch create mode 100644 SOURCES/glibc-upstream-2.34-138.patch create mode 100644 SOURCES/glibc-upstream-2.34-139.patch create mode 100644 SOURCES/glibc-upstream-2.34-140.patch create mode 100644 SOURCES/glibc-upstream-2.34-141.patch create mode 100644 SOURCES/glibc-upstream-2.34-142.patch create mode 100644 SOURCES/glibc-upstream-2.34-143.patch create mode 100644 SOURCES/glibc-upstream-2.34-144.patch create mode 100644 SOURCES/glibc-upstream-2.34-145.patch create mode 100644 SOURCES/glibc-upstream-2.34-146.patch create mode 100644 SOURCES/glibc-upstream-2.34-147.patch create mode 100644 SOURCES/glibc-upstream-2.34-148.patch create mode 100644 SOURCES/glibc-upstream-2.34-149.patch create mode 100644 SOURCES/glibc-upstream-2.34-150.patch create mode 100644 SOURCES/glibc-upstream-2.34-151.patch create mode 100644 SOURCES/glibc-upstream-2.34-152.patch create mode 100644 SOURCES/glibc-upstream-2.34-153.patch create mode 100644 SOURCES/glibc-upstream-2.34-154.patch create mode 100644 SOURCES/glibc-upstream-2.34-155.patch create mode 100644 SOURCES/glibc-upstream-2.34-156.patch create mode 100644 SOURCES/glibc-upstream-2.34-157.patch create mode 100644 SOURCES/glibc-upstream-2.34-158.patch create mode 100644 SOURCES/glibc-upstream-2.34-159.patch create mode 100644 SOURCES/glibc-upstream-2.34-160.patch create mode 100644 SOURCES/glibc-upstream-2.34-162.patch create mode 100644 SOURCES/glibc-upstream-2.34-163.patch create mode 100644 SOURCES/glibc-upstream-2.34-164.patch create mode 100644 SOURCES/glibc-upstream-2.34-165.patch create mode 100644 SOURCES/glibc-upstream-2.34-166.patch create mode 100644 SOURCES/glibc-upstream-2.34-167.patch create mode 100644 SOURCES/glibc-upstream-2.34-168.patch create mode 100644 SOURCES/glibc-upstream-2.34-169.patch create mode 100644 SOURCES/glibc-upstream-2.34-170.patch create mode 100644 SOURCES/glibc-upstream-2.34-171.patch create mode 100644 SOURCES/glibc-upstream-2.34-172.patch create mode 100644 SOURCES/glibc-upstream-2.34-173.patch create mode 100644 SOURCES/glibc-upstream-2.34-174.patch create mode 100644 SOURCES/glibc-upstream-2.34-175.patch create mode 100644 SOURCES/glibc-upstream-2.34-176.patch create mode 100644 SOURCES/glibc-upstream-2.34-177.patch create mode 100644 SOURCES/glibc-upstream-2.34-178.patch create mode 100644 SOURCES/glibc-upstream-2.34-179.patch create mode 100644 SOURCES/glibc-upstream-2.34-180.patch create mode 100644 SOURCES/glibc-upstream-2.34-181.patch create mode 100644 SOURCES/glibc-upstream-2.34-182.patch create mode 100644 SOURCES/glibc-upstream-2.34-183.patch create mode 100644 SOURCES/glibc-upstream-2.34-184.patch create mode 100644 SOURCES/glibc-upstream-2.34-185.patch create mode 100644 SOURCES/glibc-upstream-2.34-186.patch create mode 100644 SOURCES/glibc-upstream-2.34-187.patch create mode 100644 SOURCES/glibc-upstream-2.34-188.patch create mode 100644 SOURCES/glibc-upstream-2.34-189.patch create mode 100644 SOURCES/glibc-upstream-2.34-190.patch create mode 100644 SOURCES/glibc-upstream-2.34-191.patch create mode 100644 SOURCES/glibc-upstream-2.34-192.patch create mode 100644 SOURCES/glibc-upstream-2.34-193.patch create mode 100644 SOURCES/glibc-upstream-2.34-194.patch create mode 100644 SOURCES/glibc-upstream-2.34-195.patch create mode 100644 SOURCES/glibc-upstream-2.34-196.patch create mode 100644 SOURCES/glibc-upstream-2.34-197.patch create mode 100644 SOURCES/glibc-upstream-2.34-198.patch create mode 100644 SOURCES/glibc-upstream-2.34-199.patch create mode 100644 SOURCES/glibc-upstream-2.34-200.patch create mode 100644 SOURCES/glibc-upstream-2.34-201.patch create mode 100644 SOURCES/glibc-upstream-2.34-202.patch create mode 100644 SOURCES/glibc-upstream-2.34-203.patch create mode 100644 SOURCES/glibc-upstream-2.34-204.patch create mode 100644 SOURCES/glibc-upstream-2.34-205.patch create mode 100644 SOURCES/glibc-upstream-2.34-206.patch create mode 100644 SOURCES/glibc-upstream-2.34-207.patch create mode 100644 SOURCES/glibc-upstream-2.34-208.patch create mode 100644 SOURCES/glibc-upstream-2.34-209.patch create mode 100644 SOURCES/glibc-upstream-2.34-210.patch create mode 100644 SOURCES/glibc-upstream-2.34-211.patch create mode 100644 SOURCES/glibc-upstream-2.34-212.patch create mode 100644 SOURCES/glibc-upstream-2.34-213.patch create mode 100644 SOURCES/glibc-upstream-2.34-214.patch create mode 100644 SOURCES/glibc-upstream-2.34-215.patch create mode 100644 SOURCES/glibc-upstream-2.34-216.patch create mode 100644 SOURCES/glibc-upstream-2.34-217.patch create mode 100644 SOURCES/glibc-upstream-2.34-218.patch create mode 100644 SOURCES/glibc-upstream-2.34-219.patch create mode 100644 SOURCES/glibc-upstream-2.34-220.patch create mode 100644 SOURCES/glibc-upstream-2.34-221.patch create mode 100644 SOURCES/glibc-upstream-2.34-222.patch create mode 100644 SOURCES/glibc-upstream-2.34-223.patch create mode 100644 SOURCES/glibc-upstream-2.34-224.patch create mode 100644 SOURCES/glibc-upstream-2.34-225.patch create mode 100644 SOURCES/glibc-upstream-2.34-226.patch create mode 100644 SOURCES/glibc-upstream-2.34-227.patch create mode 100644 SOURCES/glibc-upstream-2.34-228.patch create mode 100644 SOURCES/glibc-upstream-2.34-229.patch create mode 100644 SOURCES/glibc-upstream-2.34-230.patch create mode 100644 SOURCES/glibc-upstream-2.34-231.patch create mode 100644 SOURCES/glibc-upstream-2.34-232.patch create mode 100644 SOURCES/glibc-upstream-2.34-233.patch create mode 100644 SOURCES/glibc-upstream-2.34-234.patch create mode 100644 SOURCES/glibc-upstream-2.34-235.patch create mode 100644 SOURCES/glibc-upstream-2.34-236.patch create mode 100644 SOURCES/glibc-upstream-2.34-237.patch create mode 100644 SOURCES/glibc-upstream-2.34-238.patch create mode 100644 SOURCES/glibc-upstream-2.34-239.patch create mode 100644 SOURCES/glibc-upstream-2.34-240.patch create mode 100644 SOURCES/glibc-upstream-2.34-241.patch create mode 100644 SOURCES/glibc-upstream-2.34-242.patch create mode 100644 SOURCES/glibc-upstream-2.34-243.patch create mode 100644 SOURCES/glibc-upstream-2.34-244.patch create mode 100644 SOURCES/glibc-upstream-2.34-245.patch create mode 100644 SOURCES/glibc-upstream-2.34-246.patch create mode 100644 SOURCES/glibc-upstream-2.34-247.patch create mode 100644 SOURCES/glibc-upstream-2.34-248.patch create mode 100644 SOURCES/glibc-upstream-2.34-249.patch create mode 100644 SOURCES/glibc-upstream-2.34-250.patch create mode 100644 SOURCES/glibc-upstream-2.34-251.patch create mode 100644 SOURCES/glibc-upstream-2.34-252.patch create mode 100644 SOURCES/glibc-upstream-2.34-253.patch create mode 100644 SOURCES/glibc-upstream-2.34-254.patch create mode 100644 SOURCES/glibc-upstream-2.34-255.patch create mode 100644 SOURCES/glibc-upstream-2.34-256.patch create mode 100644 SOURCES/glibc-upstream-2.34-257.patch create mode 100644 SOURCES/glibc-upstream-2.34-258.patch create mode 100644 SOURCES/glibc-upstream-2.34-259.patch create mode 100644 SOURCES/glibc-upstream-2.34-260.patch create mode 100644 SOURCES/glibc-upstream-2.34-261.patch create mode 100644 SOURCES/glibc-upstream-2.34-262.patch create mode 100644 SOURCES/glibc-upstream-2.34-263.patch create mode 100644 SOURCES/glibc-upstream-2.34-264.patch create mode 100644 SOURCES/glibc-upstream-2.34-265.patch create mode 100644 SOURCES/glibc-upstream-2.34-266.patch create mode 100644 SOURCES/glibc-upstream-2.34-267.patch rename SOURCES/{glibc-rh2095450.patch => glibc-upstream-2.34-268.patch} (91%) create mode 100644 SOURCES/glibc-upstream-2.34-269.patch create mode 100644 SOURCES/glibc-upstream-2.34-270.patch create mode 100644 SOURCES/glibc-upstream-2.34-271.patch create mode 100644 SOURCES/glibc-upstream-2.34-272.patch create mode 100644 SOURCES/glibc-upstream-2.34-273.patch create mode 100644 SOURCES/glibc-upstream-2.34-274.patch create mode 100644 SOURCES/glibc-upstream-2.34-275.patch create mode 100644 SOURCES/glibc-upstream-2.34-276.patch create mode 100644 SOURCES/glibc-upstream-2.34-277.patch create mode 100644 SOURCES/glibc-upstream-2.34-278.patch create mode 100644 SOURCES/glibc-upstream-2.34-279.patch create mode 100644 SOURCES/glibc-upstream-2.34-280.patch create mode 100644 SOURCES/glibc-upstream-2.34-281.patch create mode 100644 SOURCES/glibc-upstream-2.34-282.patch create mode 100644 SOURCES/glibc-upstream-2.34-283.patch create mode 100644 SOURCES/glibc-upstream-2.34-284.patch create mode 100644 SOURCES/glibc-upstream-2.34-285.patch create mode 100644 SOURCES/glibc-upstream-2.34-286.patch create mode 100644 SOURCES/glibc-upstream-2.34-287.patch create mode 100644 SOURCES/glibc-upstream-2.34-288.patch create mode 100644 SOURCES/glibc-upstream-2.34-289.patch create mode 100644 SOURCES/glibc-upstream-2.34-290.patch create mode 100644 SOURCES/glibc-upstream-2.34-291.patch create mode 100644 SOURCES/glibc-upstream-2.34-292.patch create mode 100644 SOURCES/glibc-upstream-2.34-293.patch create mode 100644 SOURCES/glibc-upstream-2.34-294.patch create mode 100644 SOURCES/glibc-upstream-2.34-295.patch create mode 100644 SOURCES/glibc-upstream-2.34-296.patch create mode 100644 SOURCES/glibc-upstream-2.34-297.patch create mode 100644 SOURCES/glibc-upstream-2.34-298.patch create mode 100644 SOURCES/glibc-upstream-2.34-299.patch create mode 100644 SOURCES/glibc-upstream-2.34-300.patch create mode 100644 SOURCES/glibc-upstream-2.34-301.patch create mode 100644 SOURCES/glibc-upstream-2.34-302.patch diff --git a/SOURCES/glibc-fedora-localedef.patch b/SOURCES/glibc-fedora-localedef.patch deleted file mode 100644 index 515611a..0000000 --- a/SOURCES/glibc-fedora-localedef.patch +++ /dev/null @@ -1,21 +0,0 @@ -Short description: Fedora-specific glibc install locale changes. -Author(s): Fedora glibc team -Origin: PATCH -Upstream status: not-needed - -The Fedora glibc build and install does not need the normal install -behaviour which updates the locale archive. The Fedora install phase -in the spec file of the rpm will handle this manually. - -diff --git a/localedata/Makefile b/localedata/Makefile -index 0eea396ad86da956..54caabda33728207 100644 ---- a/localedata/Makefile -+++ b/localedata/Makefile -@@ -413,6 +413,7 @@ define build-one-locale - echo -n '...'; \ - input=`echo $$locale | sed 's/\([^.]*\)[^@]*\(.*\)/\1\2/'`; \ - $(LOCALEDEF) $$flags --alias-file=../intl/locale.alias \ -+ --no-archive \ - -i locales/$$input -f charmaps/$$charset \ - $(addprefix --prefix=,$(install_root)) $$locale \ - && echo ' done'; diff --git a/SOURCES/glibc-rh2054789.patch b/SOURCES/glibc-rh2054789.patch new file mode 100644 index 0000000..68fff2f --- /dev/null +++ b/SOURCES/glibc-rh2054789.patch @@ -0,0 +1,27 @@ +commit ea89d5bbd9e5e514b606045d909e6ab87d851c88 +Author: Arjun Shankar +Date: Thu Feb 24 21:43:09 2022 +0100 + + localedef: Handle symbolic links when generating locale-archive + + Whenever locale data for any locale included symbolic links, localedef + would throw the error "incomplete set of locale files" and exclude it + from the generated locale archive. This commit fixes that. + + Co-authored-by: Florian Weimer + + Reviewed-by: Carlos O'Donell + +diff --git a/locale/programs/locarchive.c b/locale/programs/locarchive.c +index f38e835c52e4a967..d79278b6ed7340bf 100644 +--- a/locale/programs/locarchive.c ++++ b/locale/programs/locarchive.c +@@ -1391,7 +1391,7 @@ add_locales_to_archive (size_t nlist, char *list[], bool replace) + { + char fullname[fnamelen + 2 * strlen (d->d_name) + 7]; + +- if (d_type == DT_UNKNOWN) ++ if (d_type == DT_UNKNOWN || d_type == DT_LNK) + { + strcpy (stpcpy (stpcpy (fullname, fname), "/"), + d->d_name); diff --git a/SOURCES/glibc-rh2115828-1.patch b/SOURCES/glibc-rh2085529-1.patch similarity index 99% rename from SOURCES/glibc-rh2115828-1.patch rename to SOURCES/glibc-rh2085529-1.patch index bc6782b..5e49605 100644 --- a/SOURCES/glibc-rh2115828-1.patch +++ b/SOURCES/glibc-rh2085529-1.patch @@ -16,7 +16,7 @@ Date: Thu Dec 9 09:49:32 2021 +0100 Reviewed-by: Szabolcs Nagy diff --git a/manual/threads.texi b/manual/threads.texi -index 06b6b277a1228af1..ab44a92ca0f5a6a5 100644 +index 7f166bfa87e88c36..4869f69d2ceed255 100644 --- a/manual/threads.texi +++ b/manual/threads.texi @@ -629,6 +629,8 @@ the standard. @@ -115,7 +115,7 @@ index 06b6b277a1228af1..ab44a92ca0f5a6a5 100644 @c pthread_atfork @c pthread_attr_destroy diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c -index b39dfbff2c6678d5..4a73927f805abf94 100644 +index 23aa4cfc0b784dfc..0f5280a75d546d2f 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -22,6 +22,7 @@ @@ -163,7 +163,7 @@ index b39dfbff2c6678d5..4a73927f805abf94 100644 /* Set initial thread's stack block from 0 up to __libc_stack_end. diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile -index 0657f4003e7116c6..856a9d58cef6a879 100644 +index 5c772f69d1b1f1f1..9b7e214219943531 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -110,7 +110,8 @@ sysdep_headers += sys/mount.h sys/acct.h \ diff --git a/SOURCES/glibc-rh2115828-2.patch b/SOURCES/glibc-rh2085529-2.patch similarity index 98% rename from SOURCES/glibc-rh2115828-2.patch rename to SOURCES/glibc-rh2085529-2.patch index f3863d7..2203635 100644 --- a/SOURCES/glibc-rh2115828-2.patch +++ b/SOURCES/glibc-rh2085529-2.patch @@ -11,7 +11,7 @@ Date: Wed Feb 2 22:37:20 2022 +0100 Reviewed-by: Carlos O'Donell diff --git a/manual/threads.texi b/manual/threads.texi -index ab44a92ca0f5a6a5..4b9fc946916190ef 100644 +index 4869f69d2ceed255..48fd562923800b34 100644 --- a/manual/threads.texi +++ b/manual/threads.texi @@ -1004,7 +1004,7 @@ The manual for the @code{rseq} system call can be found @@ -24,7 +24,7 @@ index ab44a92ca0f5a6a5..4b9fc946916190ef 100644 This variable contains the offset between the thread pointer (as defined by @code{__builtin_thread_pointer} or the thread pointer register for diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c -index 4a73927f805abf94..86e87c7da4e88d66 100644 +index 0f5280a75d546d2f..d5f2587f1348441c 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -46,7 +46,7 @@ rtld_mutex_dummy (pthread_mutex_t *lock) diff --git a/SOURCES/glibc-rh2115828-3.patch b/SOURCES/glibc-rh2085529-3.patch similarity index 100% rename from SOURCES/glibc-rh2115828-3.patch rename to SOURCES/glibc-rh2085529-3.patch diff --git a/SOURCES/glibc-rh2085529-4.patch b/SOURCES/glibc-rh2085529-4.patch new file mode 100644 index 0000000..54c97e9 --- /dev/null +++ b/SOURCES/glibc-rh2085529-4.patch @@ -0,0 +1,38 @@ +Revert glibc-rh2024347-13.patch. Enable rseq by default. + +diff --git a/manual/tunables.texi b/manual/tunables.texi +index f559c44dcec4624b..28ff502990c2a10f 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -425,13 +425,11 @@ The value is measured in bytes. The default is @samp{41943040} + @end deftp + + @deftp Tunable glibc.pthread.rseq +-The @code{glibc.pthread.rseq} tunable can be set to @samp{1}, to enable +-restartable sequences support. @Theglibc{} uses this to optimize the +-@code{sched_getcpu} function. +- +-The default is @samp{0}, which means that applications can perform +-restartable sequences registration, but @code{sched_getcpu} is not +-accelerated. ++The @code{glibc.pthread.rseq} tunable can be set to @samp{0}, to disable ++restartable sequences support in @theglibc{}. This enables applications ++to perform direct restartable sequence registration with the kernel. ++The default is @samp{1}, which means that @theglibc{} performs ++registration on behalf of the application. + + Restartable sequences are a Linux-specific extension. + @end deftp +diff --git a/sysdeps/nptl/dl-tunables.list b/sysdeps/nptl/dl-tunables.list +index df2a39ce01858d3b..d24f4be0d08ba407 100644 +--- a/sysdeps/nptl/dl-tunables.list ++++ b/sysdeps/nptl/dl-tunables.list +@@ -31,7 +31,7 @@ glibc { + type: INT_32 + minval: 0 + maxval: 1 +- default: 0 ++ default: 1 + } + } + } diff --git a/SOURCES/glibc-rh2096191-1.patch b/SOURCES/glibc-rh2096191-1.patch new file mode 100644 index 0000000..b1341ef --- /dev/null +++ b/SOURCES/glibc-rh2096191-1.patch @@ -0,0 +1,67 @@ +commit 62a321b12d0e397af88fa422db65079332f971dc +Author: Florian Weimer +Date: Fri Jun 24 18:16:41 2022 +0200 + + support: Change non-address output format of support_format_dns_packet + + It makes sense to include the owner name (LHS) and record type in the + output, so that they can be checked for correctness. + + Reviewed-by: Carlos O'Donell + +diff --git a/support/support_format_dns_packet.c b/support/support_format_dns_packet.c +index 1f8e9ca172a06f4f..c3dff0e019801904 100644 +--- a/support/support_format_dns_packet.c ++++ b/support/support_format_dns_packet.c +@@ -101,6 +101,17 @@ extract_name (struct in_buffer full, struct in_buffer *in, struct dname *value) + return true; + } + ++static void ++extract_name_data (struct in_buffer full, struct in_buffer *rdata, ++ const struct dname *owner, const char *typename, FILE *out) ++{ ++ struct dname name; ++ if (extract_name (full, rdata, &name)) ++ fprintf (out, "data: %s %s %s\n", owner->name, typename, name.name); ++ else ++ fprintf (out, "error: malformed CNAME/PTR record\n"); ++} ++ + char * + support_format_dns_packet (const unsigned char *buffer, size_t length) + { +@@ -206,14 +217,11 @@ support_format_dns_packet (const unsigned char *buffer, size_t length) + } + break; + case T_CNAME: ++ extract_name_data (full, &rdata, &rname, "CNAME", mem.out); ++ break; + case T_PTR: +- { +- struct dname name; +- if (extract_name (full, &rdata, &name)) +- fprintf (mem.out, "name: %s\n", name.name); +- else +- fprintf (mem.out, "error: malformed CNAME/PTR record\n"); +- } ++ extract_name_data (full, &rdata, &rname, "PTR", mem.out); ++ break; + } + } + +diff --git a/support/tst-support_format_dns_packet.c b/support/tst-support_format_dns_packet.c +index 03ff59457e3bdde8..5596db1785009557 100644 +--- a/support/tst-support_format_dns_packet.c ++++ b/support/tst-support_format_dns_packet.c +@@ -85,8 +85,8 @@ test_multiple_cnames (void) + "\xc0\x00\x02\x01"; + check_packet (packet, sizeof (packet) - 1, __func__, + "name: www.example\n" +- "name: www1.example\n" +- "name: www2.example\n" ++ "data: www.example CNAME www1.example\n" ++ "data: www1.example CNAME www2.example\n" + "address: 192.0.2.1\n"); + } + diff --git a/SOURCES/glibc-rh2096191-2.patch b/SOURCES/glibc-rh2096191-2.patch new file mode 100644 index 0000000..1cbd0a4 --- /dev/null +++ b/SOURCES/glibc-rh2096191-2.patch @@ -0,0 +1,941 @@ +commit f282cdbe7f436c75864e5640a409a10485e9abb2 +Author: Florian Weimer +Date: Fri Jun 24 18:16:41 2022 +0200 + + resolv: Implement no-aaaa stub resolver option + + Reviewed-by: Carlos O'Donell + +diff --git a/resolv/Makefile b/resolv/Makefile +index 59e599535c7aa6eb..e8269dcb5bcf216b 100644 +--- a/resolv/Makefile ++++ b/resolv/Makefile +@@ -51,6 +51,7 @@ routines := \ + nss_dns_functions \ + res-close \ + res-name-checking \ ++ res-noaaaa \ + res-state \ + res_context_hostalias \ + res_enable_icmp \ +@@ -93,6 +94,7 @@ tests += \ + tst-resolv-binary \ + tst-resolv-edns \ + tst-resolv-network \ ++ tst-resolv-noaaaa \ + tst-resolv-nondecimal \ + tst-resolv-res_init-multi \ + tst-resolv-search \ +@@ -256,6 +258,7 @@ $(objpfx)tst-resolv-res_init-multi: $(objpfx)libresolv.so \ + $(shared-thread-library) + $(objpfx)tst-resolv-res_init-thread: $(objpfx)libresolv.so \ + $(shared-thread-library) ++$(objpfx)tst-resolv-noaaaa: $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-nondecimal: $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-qtypes: $(objpfx)libresolv.so $(shared-thread-library) + $(objpfx)tst-resolv-rotate: $(objpfx)libresolv.so $(shared-thread-library) +diff --git a/resolv/nss_dns/dns-host.c b/resolv/nss_dns/dns-host.c +index 7248ade18db5ba47..6e83fca1c5b1f98c 100644 +--- a/resolv/nss_dns/dns-host.c ++++ b/resolv/nss_dns/dns-host.c +@@ -125,6 +125,14 @@ static enum nss_status gaih_getanswer (const querybuf *answer1, int anslen1, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp); ++static enum nss_status gaih_getanswer_noaaaa (const querybuf *answer1, ++ int anslen1, ++ const char *qname, ++ struct gaih_addrtuple **pat, ++ char *buffer, size_t buflen, ++ int *errnop, int *h_errnop, ++ int32_t *ttlp); ++ + + static enum nss_status gethostbyname3_context (struct resolv_context *ctx, + const char *name, int af, +@@ -370,17 +378,31 @@ _nss_dns_gethostbyname4_r (const char *name, struct gaih_addrtuple **pat, + int resplen2 = 0; + int ans2p_malloced = 0; + ++ + int olderr = errno; +- int n = __res_context_search (ctx, name, C_IN, T_QUERY_A_AND_AAAA, ++ int n; ++ ++ if ((ctx->resp->options & RES_NOAAAA) == 0) ++ { ++ n = __res_context_search (ctx, name, C_IN, T_QUERY_A_AND_AAAA, + host_buffer.buf->buf, 2048, &host_buffer.ptr, + &ans2p, &nans2p, &resplen2, &ans2p_malloced); +- if (n >= 0) +- { +- status = gaih_getanswer (host_buffer.buf, n, (const querybuf *) ans2p, +- resplen2, name, pat, buffer, buflen, +- errnop, herrnop, ttlp); ++ if (n >= 0) ++ status = gaih_getanswer (host_buffer.buf, n, (const querybuf *) ans2p, ++ resplen2, name, pat, buffer, buflen, ++ errnop, herrnop, ttlp); + } + else ++ { ++ n = __res_context_search (ctx, name, C_IN, T_A, ++ host_buffer.buf->buf, 2048, NULL, ++ NULL, NULL, NULL, NULL); ++ if (n >= 0) ++ status = gaih_getanswer_noaaaa (host_buffer.buf, n, ++ name, pat, buffer, buflen, ++ errnop, herrnop, ttlp); ++ } ++ if (n < 0) + { + switch (errno) + { +@@ -1388,3 +1410,21 @@ gaih_getanswer (const querybuf *answer1, int anslen1, const querybuf *answer2, + + return status; + } ++ ++/* Variant of gaih_getanswer without a second (AAAA) response. */ ++static enum nss_status ++gaih_getanswer_noaaaa (const querybuf *answer1, int anslen1, const char *qname, ++ struct gaih_addrtuple **pat, ++ char *buffer, size_t buflen, ++ int *errnop, int *h_errnop, int32_t *ttlp) ++{ ++ int first = 1; ++ ++ enum nss_status status = NSS_STATUS_NOTFOUND; ++ if (anslen1 > 0) ++ status = gaih_getanswer_slice (answer1, anslen1, qname, ++ &pat, &buffer, &buflen, ++ errnop, h_errnop, ttlp, ++ &first); ++ return status; ++} +diff --git a/resolv/res-noaaaa.c b/resolv/res-noaaaa.c +new file mode 100644 +index 0000000000000000..4ba197664a86aed7 +--- /dev/null ++++ b/resolv/res-noaaaa.c +@@ -0,0 +1,143 @@ ++/* Implement suppression of AAAA queries. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* Returns true if the question type at P matches EXPECTED, and the ++ class is IN. */ ++static bool ++qtype_matches (const unsigned char *p, int expected) ++{ ++ /* This assumes that T_A/C_IN constants are less than 256, which ++ they are. */ ++ return p[0] == 0 && p[1] == expected && p[2] == 0 && p[3] == C_IN; ++} ++ ++/* Handle RES_NOAAAA translation of AAAA queries. To produce a Name ++ Error (NXDOMAIN) repsonse for domain names that do not exist, it is ++ still necessary to send a query. Using question type A is a ++ conservative choice. In the returned answer, it is necessary to ++ switch back the question type to AAAA. */ ++bool ++__res_handle_no_aaaa (struct resolv_context *ctx, ++ const unsigned char *buf, int buflen, ++ unsigned char *ans, int anssiz, int *result) ++{ ++ /* AAAA mode is not active, or the query looks invalid (will not be ++ able to be parsed). */ ++ if ((ctx->resp->options & RES_NOAAAA) == 0 ++ || buflen <= sizeof (HEADER)) ++ return false; ++ ++ /* The replacement A query is produced here. */ ++ struct ++ { ++ HEADER header; ++ unsigned char question[NS_MAXCDNAME + 4]; ++ } replacement; ++ memcpy (&replacement.header, buf, sizeof (replacement.header)); ++ ++ if (replacement.header.qr ++ || replacement.header.opcode != 0 ++ || replacement.header.rcode != 0 ++ || ntohs (replacement.header.qdcount) != 1 ++ || ntohs (replacement.header.ancount) != 0 ++ || ntohs (replacement.header.nscount) != 0) ++ /* Not a well-formed question. Let the core resolver code produce ++ the proper error. */ ++ return false; ++ ++ /* Disable EDNS0. */ ++ replacement.header.arcount = htons (0); ++ ++ /* Extract the QNAME. */ ++ int ret = __ns_name_unpack (buf, buf + buflen, buf + sizeof (HEADER), ++ replacement.question, NS_MAXCDNAME); ++ if (ret < 0) ++ /* Format error. */ ++ return false; ++ ++ /* Compute the end of the question name. */ ++ const unsigned char *after_question = buf + sizeof (HEADER) + ret; ++ ++ /* Check that we are dealing with an AAAA query. */ ++ if (buf + buflen - after_question < 4 ++ || !qtype_matches (after_question, T_AAAA)) ++ return false; ++ ++ /* Find the place to store the type/class data in the replacement ++ query. */ ++ after_question = replacement.question; ++ /* This cannot fail because __ns_name_unpack above produced a valid ++ domain name. */ ++ (void) __ns_name_skip (&after_question, &replacement.question[NS_MAXCDNAME]); ++ unsigned char *start_of_query = (unsigned char *) &replacement; ++ const unsigned char *end_of_query = after_question + 4; ++ ++ /* Produce an A/IN query. */ ++ { ++ unsigned char *p = (unsigned char *) after_question; ++ p[0] = 0; ++ p[1] = T_A; ++ p[2] = 0; ++ p[3] = C_IN; ++ } ++ ++ /* Clear the output buffer, to avoid reading undefined data when ++ rewriting the result from A to AAAA. */ ++ memset (ans, 0, anssiz); ++ ++ /* Always perform the message translation, independent of the error ++ code. */ ++ ret = __res_context_send (ctx, ++ start_of_query, end_of_query - start_of_query, ++ NULL, 0, ans, anssiz, ++ NULL, NULL, NULL, NULL, NULL); ++ ++ /* Patch in the AAAA question type if there is room and the A query ++ type was received. */ ++ after_question = ans + sizeof (HEADER); ++ if (__ns_name_skip (&after_question, ans + anssiz) == 0 ++ && ans + anssiz - after_question >= 4 ++ && qtype_matches (after_question, T_A)) ++ { ++ ((unsigned char *) after_question)[1] = T_AAAA; ++ ++ /* Create an aligned copy of the header. Hide all data except ++ the question from the response. Put back the header. There is ++ no need to change the response code. The zero answer count turns ++ a positive response with data into a no-data response. */ ++ memcpy (&replacement.header, ans, sizeof (replacement.header)); ++ replacement.header.ancount = htons (0); ++ replacement.header.nscount = htons (0); ++ replacement.header.arcount = htons (0); ++ memcpy (ans, &replacement.header, sizeof (replacement.header)); ++ ++ /* Truncate the reply. */ ++ if (ret <= 0) ++ *result = ret; ++ else ++ *result = after_question - ans + 4; ++ } ++ ++ return true; ++} +diff --git a/resolv/res_debug.c b/resolv/res_debug.c +index 030df0aa90c9f34f..b0fe69b1aa5186a0 100644 +--- a/resolv/res_debug.c ++++ b/resolv/res_debug.c +@@ -613,6 +613,7 @@ p_option(u_long option) { + case RES_NOTLDQUERY: return "no-tld-query"; + case RES_NORELOAD: return "no-reload"; + case RES_TRUSTAD: return "trust-ad"; ++ case RES_NOAAAA: return "no-aaaa"; + /* XXX nonreentrant */ + default: sprintf(nbuf, "?0x%lx?", (u_long)option); + return (nbuf); +diff --git a/resolv/res_init.c b/resolv/res_init.c +index 6b2936eda9618ac9..8bde915903565f60 100644 +--- a/resolv/res_init.c ++++ b/resolv/res_init.c +@@ -695,6 +695,7 @@ res_setoptions (struct resolv_conf_parser *parser, const char *options) + { STRnLEN ("no-reload"), 0, RES_NORELOAD }, + { STRnLEN ("use-vc"), 0, RES_USEVC }, + { STRnLEN ("trust-ad"), 0, RES_TRUSTAD }, ++ { STRnLEN ("no-aaaa"), 0, RES_NOAAAA }, + }; + #define noptions (sizeof (options) / sizeof (options[0])) + for (int i = 0; i < noptions; ++i) +diff --git a/resolv/res_query.c b/resolv/res_query.c +index 75b0e5f2f7b51eb2..2f3c28cfc8c0d832 100644 +--- a/resolv/res_query.c ++++ b/resolv/res_query.c +@@ -204,10 +204,26 @@ __res_context_query (struct resolv_context *ctx, const char *name, + free (buf); + return (n); + } +- assert (answerp == NULL || (void *) *answerp == (void *) answer); +- n = __res_context_send (ctx, query1, nquery1, query2, nquery2, answer, +- anslen, answerp, answerp2, nanswerp2, resplen2, +- answerp2_malloced); ++ ++ /* Suppress AAAA lookups if required. __res_handle_no_aaaa ++ checks RES_NOAAAA first, so avoids parsing the ++ just-generated query packet in most cases. nss_dns avoids ++ using T_QUERY_A_AND_AAAA in RES_NOAAAA mode, so there is no ++ need to handle it here. */ ++ if (type == T_AAAA && __res_handle_no_aaaa (ctx, query1, nquery1, ++ answer, anslen, &n)) ++ /* There must be no second query for AAAA queries. The code ++ below is still needed to translate NODATA responses. */ ++ assert (query2 == NULL); ++ else ++ { ++ assert (answerp == NULL || (void *) *answerp == (void *) answer); ++ n = __res_context_send (ctx, query1, nquery1, query2, nquery2, ++ answer, anslen, ++ answerp, answerp2, nanswerp2, resplen2, ++ answerp2_malloced); ++ } ++ + if (use_malloc) + free (buf); + if (n < 0) { +diff --git a/resolv/res_send.c b/resolv/res_send.c +index 9f86f5fe47194887..8ac6a307b40fa2ca 100644 +--- a/resolv/res_send.c ++++ b/resolv/res_send.c +@@ -438,8 +438,13 @@ context_send_common (struct resolv_context *ctx, + RES_SET_H_ERRNO (&_res, NETDB_INTERNAL); + return -1; + } +- int result = __res_context_send (ctx, buf, buflen, NULL, 0, ans, anssiz, +- NULL, NULL, NULL, NULL, NULL); ++ ++ int result; ++ if (__res_handle_no_aaaa (ctx, buf, buflen, ans, anssiz, &result)) ++ return result; ++ ++ result = __res_context_send (ctx, buf, buflen, NULL, 0, ans, anssiz, ++ NULL, NULL, NULL, NULL, NULL); + __resolv_context_put (ctx); + return result; + } +diff --git a/resolv/resolv-internal.h b/resolv/resolv-internal.h +index 216e47ed42076b72..8ab02fc9e648d30f 100644 +--- a/resolv/resolv-internal.h ++++ b/resolv/resolv-internal.h +@@ -78,6 +78,14 @@ int __res_context_send (struct resolv_context *, const unsigned char *, int, + int *, int *, int *); + libc_hidden_proto (__res_context_send) + ++/* Return true if the query has been handled in RES_NOAAAA mode. For ++ that, RES_NOAAAA must be active, and the question type must be AAAA. ++ The caller is expected to return *RESULT as the return value. */ ++bool __res_handle_no_aaaa (struct resolv_context *ctx, ++ const unsigned char *buf, int buflen, ++ unsigned char *ans, int anssiz, int *result) ++ attribute_hidden; ++ + /* Internal function similar to res_hostalias. */ + const char *__res_context_hostalias (struct resolv_context *, + const char *, char *, size_t); +diff --git a/resolv/resolv.h b/resolv/resolv.h +index e7c8d44645912ddf..3a79ffea57a6916f 100644 +--- a/resolv/resolv.h ++++ b/resolv/resolv.h +@@ -132,6 +132,7 @@ struct res_sym { + as a TLD. */ + #define RES_NORELOAD 0x02000000 /* No automatic configuration reload. */ + #define RES_TRUSTAD 0x04000000 /* Request AD bit, keep it in responses. */ ++#define RES_NOAAAA 0x08000000 /* Suppress AAAA queries. */ + + #define RES_DEFAULT (RES_RECURSE|RES_DEFNAMES|RES_DNSRCH) + +diff --git a/resolv/tst-resolv-noaaaa.c b/resolv/tst-resolv-noaaaa.c +new file mode 100644 +index 0000000000000000..56b25f88a58ad286 +--- /dev/null ++++ b/resolv/tst-resolv-noaaaa.c +@@ -0,0 +1,533 @@ ++/* Test the RES_NOAAAA resolver option. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Used to keep track of the number of queries. */ ++static volatile unsigned int queries; ++ ++static void ++response (const struct resolv_response_context *ctx, ++ struct resolv_response_builder *b, ++ const char *qname, uint16_t qclass, uint16_t qtype) ++{ ++ /* Each test should only send one query. */ ++ ++queries; ++ TEST_COMPARE (queries, 1); ++ ++ /* AAAA queries are supposed to be disabled. */ ++ TEST_VERIFY (qtype != T_AAAA); ++ TEST_COMPARE (qclass, C_IN); ++ ++ /* The only other query type besides A is PTR. */ ++ if (qtype != T_A) ++ TEST_COMPARE (qtype, T_PTR); ++ ++ int an, ns, ar; ++ char *tail; ++ if (sscanf (qname, "an%d.ns%d.ar%d.%ms", &an, &ns, &ar, &tail) != 4) ++ FAIL_EXIT1 ("invalid QNAME: %s\n", qname); ++ TEST_COMPARE_STRING (tail, "example"); ++ free (tail); ++ ++ if (an < 0 || ns < 0 || ar < 0) ++ { ++ struct resolv_response_flags flags = { .rcode = NXDOMAIN, }; ++ resolv_response_init (b, flags); ++ resolv_response_add_question (b, qname, qclass, qtype); ++ return; ++ } ++ ++ struct resolv_response_flags flags = {}; ++ resolv_response_init (b, flags); ++ resolv_response_add_question (b, qname, qclass, qtype); ++ ++ resolv_response_section (b, ns_s_an); ++ for (int i = 0; i < an; ++i) ++ { ++ resolv_response_open_record (b, qname, qclass, qtype, 60); ++ switch (qtype) ++ { ++ case T_A: ++ char ipv4[4] = {192, 0, 2, i + 1}; ++ resolv_response_add_data (b, &ipv4, sizeof (ipv4)); ++ break; ++ ++ case T_PTR: ++ char *name = xasprintf ("ptr-%d", i); ++ resolv_response_add_name (b, name); ++ free (name); ++ break; ++ } ++ resolv_response_close_record (b); ++ } ++ ++ resolv_response_section (b, ns_s_ns); ++ for (int i = 0; i < ns; ++i) ++ { ++ resolv_response_open_record (b, qname, qclass, T_NS, 60); ++ char *name = xasprintf ("ns%d.example.net", i); ++ resolv_response_add_name (b, name); ++ free (name); ++ resolv_response_close_record (b); ++ } ++ ++ resolv_response_section (b, ns_s_ar); ++ int addr = 1; ++ for (int i = 0; i < ns; ++i) ++ { ++ char *name = xasprintf ("ns%d.example.net", i); ++ for (int j = 0; j < ar; ++j) ++ { ++ resolv_response_open_record (b, name, qclass, T_A, 60); ++ char ipv4[4] = {192, 0, 2, addr}; ++ resolv_response_add_data (b, &ipv4, sizeof (ipv4)); ++ resolv_response_close_record (b); ++ ++ resolv_response_open_record (b, name, qclass, T_AAAA, 60); ++ char ipv6[16] ++ = {0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, addr}; ++ resolv_response_add_data (b, &ipv6, sizeof (ipv6)); ++ resolv_response_close_record (b); ++ ++ ++addr; ++ } ++ free (name); ++ } ++} ++ ++/* Number of modes. Lowest bit encodes *n* function vs implicit _res ++ argument. The mode numbers themselves are arbitrary. */ ++enum { mode_count = 8 }; ++ ++/* res_send-like modes do not perform error translation. */ ++enum { first_send_mode = 6 }; ++ ++static int ++libresolv_query (unsigned int mode, const char *qname, uint16_t qtype, ++ unsigned char *buf, size_t buflen) ++{ ++ int saved_errno = errno; ++ ++ TEST_VERIFY_EXIT (mode < mode_count); ++ ++ switch (mode) ++ { ++ case 0: ++ return res_query (qname, C_IN, qtype, buf, buflen); ++ case 1: ++ return res_nquery (&_res, qname, C_IN, qtype, buf, buflen); ++ case 2: ++ return res_search (qname, C_IN, qtype, buf, buflen); ++ case 3: ++ return res_nsearch (&_res, qname, C_IN, qtype, buf, buflen); ++ case 4: ++ return res_querydomain (qname, "", C_IN, qtype, buf, buflen); ++ case 5: ++ return res_nquerydomain (&_res, qname, "", C_IN, qtype, buf, buflen); ++ case 6: ++ { ++ unsigned char querybuf[512]; ++ int ret = res_mkquery (QUERY, qname, C_IN, qtype, ++ NULL, 0, NULL, querybuf, sizeof (querybuf)); ++ TEST_VERIFY_EXIT (ret > 0); ++ errno = saved_errno; ++ return res_send (querybuf, ret, buf, buflen); ++ } ++ case 7: ++ { ++ unsigned char querybuf[512]; ++ int ret = res_nmkquery (&_res, QUERY, qname, C_IN, qtype, ++ NULL, 0, NULL, querybuf, sizeof (querybuf)); ++ TEST_VERIFY_EXIT (ret > 0); ++ errno = saved_errno; ++ return res_nsend (&_res, querybuf, ret, buf, buflen); ++ } ++ } ++ __builtin_unreachable (); ++} ++ ++static int ++do_test (void) ++{ ++ struct resolv_test *obj = resolv_test_start ++ ((struct resolv_redirect_config) ++ { ++ .response_callback = response ++ }); ++ ++ _res.options |= RES_NOAAAA; ++ ++ check_hostent ("an1.ns2.ar1.example", ++ gethostbyname ("an1.ns2.ar1.example"), ++ "name: an1.ns2.ar1.example\n" ++ "address: 192.0.2.1\n"); ++ queries = 0; ++ check_hostent ("an0.ns2.ar1.example", ++ gethostbyname ("an0.ns2.ar1.example"), ++ "error: NO_ADDRESS\n"); ++ queries = 0; ++ check_hostent ("an-1.ns2.ar1.example", ++ gethostbyname ("an-1.ns2.ar1.example"), ++ "error: HOST_NOT_FOUND\n"); ++ queries = 0; ++ ++ check_hostent ("an1.ns2.ar1.example AF_INET", ++ gethostbyname2 ("an1.ns2.ar1.example", AF_INET), ++ "name: an1.ns2.ar1.example\n" ++ "address: 192.0.2.1\n"); ++ queries = 0; ++ check_hostent ("an0.ns2.ar1.example AF_INET", ++ gethostbyname2 ("an0.ns2.ar1.example", AF_INET), ++ "error: NO_ADDRESS\n"); ++ queries = 0; ++ check_hostent ("an-1.ns2.ar1.example AF_INET", ++ gethostbyname2 ("an-1.ns2.ar1.example", AF_INET), ++ "error: HOST_NOT_FOUND\n"); ++ queries = 0; ++ ++ check_hostent ("an1.ns2.ar1.example AF_INET6", ++ gethostbyname2 ("an1.ns2.ar1.example", AF_INET6), ++ "error: NO_ADDRESS\n"); ++ queries = 0; ++ check_hostent ("an0.ns2.ar1.example AF_INET6", ++ gethostbyname2 ("an0.ns2.ar1.example", AF_INET6), ++ "error: NO_ADDRESS\n"); ++ queries = 0; ++ check_hostent ("an-1.ns2.ar1.example AF_INET6", ++ gethostbyname2 ("an-1.ns2.ar1.example", AF_INET6), ++ "error: HOST_NOT_FOUND\n"); ++ queries = 0; ++ ++ /* Multiple addresses. */ ++ check_hostent ("an2.ns0.ar0.example", ++ gethostbyname ("an2.ns0.ar0.example"), ++ "name: an2.ns0.ar0.example\n" ++ "address: 192.0.2.1\n" ++ "address: 192.0.2.2\n"); ++ queries = 0; ++ check_hostent ("an2.ns0.ar0.example AF_INET6", ++ gethostbyname2 ("an2.ns0.ar0.example", AF_INET6), ++ "error: NO_ADDRESS\n"); ++ queries = 0; ++ ++ /* getaddrinfo checks with one address. */ ++ struct addrinfo *ai; ++ int ret; ++ ret = getaddrinfo ("an1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an1.ns2.ar1.example (AF_INET)", ai, ret, ++ "address: STREAM/TCP 192.0.2.1 80\n"); ++ freeaddrinfo (ai); ++ queries = 0; ++ ret = getaddrinfo ("an1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET6, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an1.ns2.ar1.example (AF_INET6)", ai, ret, ++ "error: No address associated with hostname\n"); ++ queries = 0; ++ ret = getaddrinfo ("an1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_UNSPEC, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an1.ns2.ar1.example (AF_UNSPEC)", ai, ret, ++ "address: STREAM/TCP 192.0.2.1 80\n"); ++ freeaddrinfo (ai); ++ queries = 0; ++ ++ /* getaddrinfo checks with three addresses. */ ++ ret = getaddrinfo ("an3.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an3.ns2.ar1.example (AF_INET)", ai, ret, ++ "address: STREAM/TCP 192.0.2.1 80\n" ++ "address: STREAM/TCP 192.0.2.2 80\n" ++ "address: STREAM/TCP 192.0.2.3 80\n"); ++ freeaddrinfo (ai); ++ queries = 0; ++ ret = getaddrinfo ("an3.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET6, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an3.ns2.ar1.example (AF_INET6)", ai, ret, ++ "error: No address associated with hostname\n"); ++ queries = 0; ++ ret = getaddrinfo ("an3.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_UNSPEC, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an3.ns2.ar1.example (AF_UNSPEC)", ai, ret, ++ "address: STREAM/TCP 192.0.2.1 80\n" ++ "address: STREAM/TCP 192.0.2.2 80\n" ++ "address: STREAM/TCP 192.0.2.3 80\n"); ++ freeaddrinfo (ai); ++ queries = 0; ++ ++ /* getaddrinfo checks with no address. */ ++ ret = getaddrinfo ("an0.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an0.ns2.ar1.example (AF_INET)", ai, ret, ++ "error: No address associated with hostname\n"); ++ queries = 0; ++ ret = getaddrinfo ("an0.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET6, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an0.ns2.ar1.example (AF_INET6)", ai, ret, ++ "error: No address associated with hostname\n"); ++ queries = 0; ++ ret = getaddrinfo ("an0.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_UNSPEC, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an-1.ns2.ar1.example (AF_UNSPEC)", ai, ret, ++ "error: No address associated with hostname\n"); ++ queries = 0; ++ ++ /* getaddrinfo checks with NXDOMAIN. */ ++ ret = getaddrinfo ("an-1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an-1.ns2.ar1.example (AF_INET)", ai, ret, ++ "error: Name or service not known\n"); ++ queries = 0; ++ ret = getaddrinfo ("an-1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_INET6, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an-1.ns2.ar1.example (AF_INET6)", ai, ret, ++ "error: Name or service not known\n"); ++ queries = 0; ++ ret = getaddrinfo ("an-1.ns2.ar1.example", "80", ++ &(struct addrinfo) ++ { ++ .ai_family = AF_UNSPEC, ++ .ai_socktype = SOCK_STREAM, ++ }, &ai); ++ check_addrinfo ("an-1.ns2.ar1.example (AF_UNSPEC)", ai, ret, ++ "error: Name or service not known\n"); ++ queries = 0; ++ ++ for (unsigned int mode = 0; mode < mode_count; ++mode) ++ { ++ unsigned char *buf; ++ int ret; ++ ++ /* Response for A. */ ++ buf = malloc (512); ++ ret = libresolv_query (mode, "an1.ns2.ar1.example", T_A, buf, 512); ++ TEST_VERIFY_EXIT (ret > 0); ++ check_dns_packet ("an1.ns2.ar1.example A", buf, ret, ++ "name: an1.ns2.ar1.example\n" ++ "address: 192.0.2.1\n"); ++ free (buf); ++ queries = 0; ++ ++ /* NODATA response for A. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an0.ns2.ar1.example", T_A, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, NO_ADDRESS); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, 0); ++ check_dns_packet ("an1.ns2.ar1.example A", buf, ret, ++ "name: an0.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* NXDOMAIN response for A. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an-1.ns2.ar1.example", T_A, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, HOST_NOT_FOUND); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, NXDOMAIN); ++ check_dns_packet ("an1.ns2.ar1.example A", buf, ret, ++ "name: an-1.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* Response for PTR. */ ++ buf = malloc (512); ++ ret = libresolv_query (mode, "an1.ns2.ar1.example", T_PTR, buf, 512); ++ TEST_VERIFY_EXIT (ret > 0); ++ check_dns_packet ("an1.ns2.ar1.example PTR", buf, ret, ++ "name: an1.ns2.ar1.example\n" ++ "data: an1.ns2.ar1.example PTR ptr-0\n"); ++ free (buf); ++ queries = 0; ++ ++ /* NODATA response for PTR. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an0.ns2.ar1.example", T_PTR, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, NO_ADDRESS); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, 0); ++ check_dns_packet ("an1.ns2.ar1.example PTR", buf, ret, ++ "name: an0.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* NXDOMAIN response for PTR. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an-1.ns2.ar1.example", T_PTR, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, HOST_NOT_FOUND); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, NXDOMAIN); ++ check_dns_packet ("an1.ns2.ar1.example PTR", buf, ret, ++ "name: an-1.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* NODATA response for AAAA. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an1.ns2.ar1.example", T_AAAA, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, NO_ADDRESS); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, 0); ++ check_dns_packet ("an1.ns2.ar1.example A", buf, ret, ++ "name: an1.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* NODATA response for AAAA (original is already NODATA). */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an0.ns2.ar1.example", T_AAAA, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, NO_ADDRESS); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, 0); ++ check_dns_packet ("an0.ns2.ar1.example A", buf, ret, ++ "name: an0.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ ++ /* NXDOMAIN response. */ ++ buf = malloc (512); ++ errno = 0; ++ ret = libresolv_query (mode, "an-1.ns2.ar1.example", T_AAAA, buf, 512); ++ if (mode < first_send_mode) ++ { ++ TEST_COMPARE (ret, -1); ++ TEST_COMPARE (errno, 0); ++ TEST_COMPARE (h_errno, HOST_NOT_FOUND); ++ } ++ else ++ { ++ TEST_VERIFY_EXIT (ret > 0); ++ TEST_COMPARE (((HEADER *)buf)->rcode, NXDOMAIN); ++ check_dns_packet ("an-1.ns2.ar1.example A", buf, ret, ++ "name: an-1.ns2.ar1.example\n"); ++ } ++ free (buf); ++ queries = 0; ++ } ++ ++ resolv_test_end (obj); ++ ++ return 0; ++} ++ ++#include +diff --git a/resolv/tst-resolv-res_init-skeleton.c b/resolv/tst-resolv-res_init-skeleton.c +index c87596762fcb23b1..28ed9c2eb150532d 100644 +--- a/resolv/tst-resolv-res_init-skeleton.c ++++ b/resolv/tst-resolv-res_init-skeleton.c +@@ -128,6 +128,7 @@ print_resp (FILE *fp, res_state resp) + print_option_flag (fp, &options, RES_NOTLDQUERY, "no-tld-query"); + print_option_flag (fp, &options, RES_NORELOAD, "no-reload"); + print_option_flag (fp, &options, RES_TRUSTAD, "trust-ad"); ++ print_option_flag (fp, &options, RES_NOAAAA, "no-aaaa"); + fputc ('\n', fp); + if (options != 0) + fprintf (fp, "; error: unresolved option bits: 0x%x\n", options); +@@ -721,6 +722,15 @@ struct test_case test_cases[] = + "nameserver 192.0.2.1\n" + "; nameserver[0]: [192.0.2.1]:53\n" + }, ++ {.name = "no-aaaa flag", ++ .conf = "options no-aaaa\n" ++ "nameserver 192.0.2.1\n", ++ .expected = "options no-aaaa\n" ++ "search example.com\n" ++ "; search[0]: example.com\n" ++ "nameserver 192.0.2.1\n" ++ "; nameserver[0]: [192.0.2.1]:53\n" ++ }, + { NULL } + }; + diff --git a/SOURCES/glibc-rh2115828-4.patch b/SOURCES/glibc-rh2115828-4.patch deleted file mode 100644 index 79a7380..0000000 --- a/SOURCES/glibc-rh2115828-4.patch +++ /dev/null @@ -1,16 +0,0 @@ -Adjust for disabled-by default rseq in downstream: tst-rseq needs to enable -rseq using the tunable, tst-rseq-disable should use the default. - -diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile -index 856a9d58cef6a879..23a577e263d6dc22 100644 ---- a/sysdeps/unix/sysv/linux/Makefile -+++ b/sysdeps/unix/sysv/linux/Makefile -@@ -233,7 +233,7 @@ $(objpfx)tst-mman-consts.out: ../sysdeps/unix/sysv/linux/tst-mman-consts.py - < /dev/null > $@ 2>&1; $(evaluate-test) - $(objpfx)tst-mman-consts.out: $(sysdeps-linux-python-deps) - --tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 -+tst-rseq-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=1 - - endif # $(subdir) == misc - diff --git a/SOURCES/glibc-upstream-2.34-108.patch b/SOURCES/glibc-upstream-2.34-108.patch new file mode 100644 index 0000000..e4186ee --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-108.patch @@ -0,0 +1,32 @@ +commit 0351c75c5f94134fcec0e778e8cf86d149f8bbfb +Author: Adhemerval Zanella +Date: Thu Feb 3 16:52:52 2022 -0300 + + linux: Fix missing __convert_scm_timestamps (BZ #28860) + + Commit 948ce73b31 made recvmsg/recvmmsg to always call + __convert_scm_timestamps for 64 bit time_t symbol, so adjust it to + always build it for __TIMESIZE != 64. + + It fixes build for architecture with 32 bit time_t support when + configured with minimum kernel of 5.1. + + (cherry-picked from 798d716df71fb23dc89d1d5dba1fc26a1b5c0024) + +diff --git a/sysdeps/unix/sysv/linux/convert_scm_timestamps.c b/sysdeps/unix/sysv/linux/convert_scm_timestamps.c +index 5d3c4199e0b32944..953ce97bd2e03849 100644 +--- a/sysdeps/unix/sysv/linux/convert_scm_timestamps.c ++++ b/sysdeps/unix/sysv/linux/convert_scm_timestamps.c +@@ -16,9 +16,10 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#include + +-#ifndef __ASSUME_TIME64_SYSCALLS ++#if __TIMESIZE != 64 + # include + # include + # include diff --git a/SOURCES/glibc-upstream-2.34-110.patch b/SOURCES/glibc-upstream-2.34-110.patch new file mode 100644 index 0000000..cd9bc09 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-110.patch @@ -0,0 +1,192 @@ +commit 007e054d786be340699c634e3a3b30ab1fde1a7a +Author: Dmitry V. Levin +Date: Sat Feb 5 08:00:00 2022 +0000 + + linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] + + get_nprocs() and get_nprocs_conf() use various methods to obtain an + accurate number of processors. Re-introduce __get_nprocs_sched() as + a source of information, and fix the order in which these methods are + used to return the most accurate information. The primary source of + information used in both functions remains unchanged. + + This also changes __get_nprocs_sched() error return value from 2 to 0, + but all its users are already prepared to handle that. + + Old fallback order: + get_nprocs: + /sys/devices/system/cpu/online -> /proc/stat -> 2 + get_nprocs_conf: + /sys/devices/system/cpu/ -> /proc/stat -> 2 + + New fallback order: + get_nprocs: + /sys/devices/system/cpu/online -> /proc/stat -> sched_getaffinity -> 2 + get_nprocs_conf: + /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 + + Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") + Closes: BZ #28865 + Reviewed-by: Adhemerval Zanella + + (cherry picked from commit e1d32b836410767270a3adf1f82b1a47e6e4cd51) + +diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c +index 7babd947aa902e77..327802b14c7326a3 100644 +--- a/sysdeps/unix/sysv/linux/getsysstats.c ++++ b/sysdeps/unix/sysv/linux/getsysstats.c +@@ -51,9 +51,8 @@ __get_nprocs_sched (void) + is an arbitrary values assuming such systems should be rare and there + is no offline cpus. */ + return max_num_cpus; +- /* Some other error. 2 is conservative (not a uniprocessor system, so +- atomics are needed). */ +- return 2; ++ /* Some other error. */ ++ return 0; + } + + static char * +@@ -109,22 +108,19 @@ next_line (int fd, char *const buffer, char **cp, char **re, + } + + static int +-get_nproc_stat (char *buffer, size_t buffer_size) ++get_nproc_stat (void) + { ++ enum { buffer_size = 1024 }; ++ char buffer[buffer_size]; + char *buffer_end = buffer + buffer_size; + char *cp = buffer_end; + char *re = buffer_end; +- +- /* Default to an SMP system in case we cannot obtain an accurate +- number. */ +- int result = 2; ++ int result = 0; + + const int flags = O_RDONLY | O_CLOEXEC; + int fd = __open_nocancel ("/proc/stat", flags); + if (fd != -1) + { +- result = 0; +- + char *l; + while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL) + /* The current format of /proc/stat has all the cpu* entries +@@ -140,8 +136,8 @@ get_nproc_stat (char *buffer, size_t buffer_size) + return result; + } + +-int +-__get_nprocs (void) ++static int ++get_nprocs_cpu_online (void) + { + enum { buffer_size = 1024 }; + char buffer[buffer_size]; +@@ -180,7 +176,8 @@ __get_nprocs (void) + } + } + +- result += m - n + 1; ++ if (m >= n) ++ result += m - n + 1; + + l = endp; + if (l < re && *l == ',') +@@ -189,28 +186,18 @@ __get_nprocs (void) + while (l < re && *l != '\n'); + + __close_nocancel_nostatus (fd); +- +- if (result > 0) +- return result; + } + +- return get_nproc_stat (buffer, buffer_size); ++ return result; + } +-libc_hidden_def (__get_nprocs) +-weak_alias (__get_nprocs, get_nprocs) +- + +-/* On some architectures it is possible to distinguish between configured +- and active cpus. */ +-int +-__get_nprocs_conf (void) ++static int ++get_nprocs_cpu (void) + { +- /* Try to use the sysfs filesystem. It has actual information about +- online processors. */ ++ int count = 0; + DIR *dir = __opendir ("/sys/devices/system/cpu"); + if (dir != NULL) + { +- int count = 0; + struct dirent64 *d; + + while ((d = __readdir64 (dir)) != NULL) +@@ -225,12 +212,57 @@ __get_nprocs_conf (void) + + __closedir (dir); + +- return count; + } ++ return count; ++} + +- enum { buffer_size = 1024 }; +- char buffer[buffer_size]; +- return get_nproc_stat (buffer, buffer_size); ++static int ++get_nprocs_fallback (void) ++{ ++ int result; ++ ++ /* Try /proc/stat first. */ ++ result = get_nproc_stat (); ++ if (result != 0) ++ return result; ++ ++ /* Try sched_getaffinity. */ ++ result = __get_nprocs_sched (); ++ if (result != 0) ++ return result; ++ ++ /* We failed to obtain an accurate number. Be conservative: return ++ the smallest number meaning that this is not a uniprocessor system, ++ so atomics are needed. */ ++ return 2; ++} ++ ++int ++__get_nprocs (void) ++{ ++ /* Try /sys/devices/system/cpu/online first. */ ++ int result = get_nprocs_cpu_online (); ++ if (result != 0) ++ return result; ++ ++ /* Fall back to /proc/stat and sched_getaffinity. */ ++ return get_nprocs_fallback (); ++} ++libc_hidden_def (__get_nprocs) ++weak_alias (__get_nprocs, get_nprocs) ++ ++/* On some architectures it is possible to distinguish between configured ++ and active cpus. */ ++int ++__get_nprocs_conf (void) ++{ ++ /* Try /sys/devices/system/cpu/ first. */ ++ int result = get_nprocs_cpu (); ++ if (result != 0) ++ return result; ++ ++ /* Fall back to /proc/stat and sched_getaffinity. */ ++ return get_nprocs_fallback (); + } + libc_hidden_def (__get_nprocs_conf) + weak_alias (__get_nprocs_conf, get_nprocs_conf) diff --git a/SOURCES/glibc-upstream-2.34-111.patch b/SOURCES/glibc-upstream-2.34-111.patch new file mode 100644 index 0000000..cf08e56 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-111.patch @@ -0,0 +1,60 @@ +commit 04d60ce0f21ffe2a4add148cb37a1942dbad64e2 +Author: H.J. Lu +Date: Thu Feb 17 08:10:35 2022 -0800 + + string: Add a testcase for wcsncmp with SIZE_MAX [BZ #28755] + + Verify that wcsncmp (L("abc"), L("abd"), SIZE_MAX) == 0. The new test + fails without + + commit ddf0992cf57a93200e0c782e2a94d0733a5a0b87 + Author: Noah Goldstein + Date: Sun Jan 9 16:02:21 2022 -0600 + + x86: Fix __wcsncmp_avx2 in strcmp-avx2.S [BZ# 28755] + + and + + commit 7e08db3359c86c94918feb33a1182cd0ff3bb10b + Author: Noah Goldstein + Date: Sun Jan 9 16:02:28 2022 -0600 + + x86: Fix __wcsncmp_evex in strcmp-evex.S [BZ# 28755] + + This is for BZ #28755. + + Reviewed-by: Sunil K Pandey + + (cherry picked from commit aa5a720056d37cf24924c138a3dbe6dace98e97c) + +diff --git a/string/test-strncmp.c b/string/test-strncmp.c +index 10b34de8d2acb2a1..97e831d88fd24316 100644 +--- a/string/test-strncmp.c ++++ b/string/test-strncmp.c +@@ -435,6 +435,18 @@ check3 (void) + } + } + ++static void ++check4 (void) ++{ ++ const CHAR *s1 = L ("abc"); ++ CHAR *s2 = STRDUP (s1); ++ ++ FOR_EACH_IMPL (impl, 0) ++ check_result (impl, s1, s2, SIZE_MAX, 0); ++ ++ free (s2); ++} ++ + int + test_main (void) + { +@@ -445,6 +457,7 @@ test_main (void) + check1 (); + check2 (); + check3 (); ++ check4 (); + + printf ("%23s", ""); + FOR_EACH_IMPL (impl, 0) diff --git a/SOURCES/glibc-upstream-2.34-112.patch b/SOURCES/glibc-upstream-2.34-112.patch new file mode 100644 index 0000000..d6a677a --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-112.patch @@ -0,0 +1,120 @@ +commit 38e0d2479413ccdbc02b4c9e9e246eca31e956c9 +Author: Noah Goldstein +Date: Tue Feb 15 08:18:15 2022 -0600 + + x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #28896] + + In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would + call strcmp-avx2 and wcscmp-avx2 respectively. This would have + not checks around vzeroupper and would trigger spurious + aborts. This commit fixes that. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on + AVX2 machines with and without RTM. + + Co-authored-by: H.J. Lu + + (cherry picked from commit c6272098323153db373f2986c67786ea8c85f1cf) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index 36ca1a7126047b86..af934d6ccf1fa337 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -105,7 +105,7 @@ CFLAGS-tst-memset-rtm.c += -mrtm + CFLAGS-tst-strchr-rtm.c += -mrtm + CFLAGS-tst-strcpy-rtm.c += -mrtm + CFLAGS-tst-strlen-rtm.c += -mrtm +-CFLAGS-tst-strncmp-rtm.c += -mrtm ++CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error + CFLAGS-tst-strrchr-rtm.c += -mrtm + endif + +diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c +index 236ad951b5b59cd1..4d0004b58aae428d 100644 +--- a/sysdeps/x86/tst-strncmp-rtm.c ++++ b/sysdeps/x86/tst-strncmp-rtm.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + + #define LOOP 3000 +@@ -45,8 +46,22 @@ function (void) + return 1; + } + ++__attribute__ ((noinline, noclone)) ++static int ++function_overflow (void) ++{ ++ if (strncmp (string1, string2, SIZE_MAX) == 0) ++ return 0; ++ else ++ return 1; ++} ++ + static int + do_test (void) + { +- return do_test_1 ("strncmp", LOOP, prepare, function); ++ int status = do_test_1 ("strncmp", LOOP, prepare, function); ++ if (status != EXIT_SUCCESS) ++ return status; ++ status = do_test_1 ("strncmp", LOOP, prepare, function_overflow); ++ return status; + } +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index 3dfcb1bf803cf9ec..fa70c994fc25dfd8 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -95,7 +95,7 @@ ENTRY (STRCMP) + length to bound a valid memory region. In these cases just use + 'wcscmp'. */ + shrq $56, %rcx +- jnz __wcscmp_avx2 ++ jnz OVERFLOW_STRCMP + # endif + /* Convert units: from wide to byte char. */ + shl $2, %RDX_LP +diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S +index 37d1224bb9b7056b..68bad365ba728eec 100644 +--- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S ++++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S +@@ -1,3 +1,4 @@ + #define STRCMP __strncmp_avx2_rtm + #define USE_AS_STRNCMP 1 ++#define OVERFLOW_STRCMP __strcmp_avx2_rtm + #include "strcmp-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S +index 1678bcc235a4bc6a..f138e9f1fdcf277c 100644 +--- a/sysdeps/x86_64/multiarch/strncmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S +@@ -1,3 +1,4 @@ + #define STRCMP __strncmp_avx2 + #define USE_AS_STRNCMP 1 ++#define OVERFLOW_STRCMP __strcmp_avx2 + #include "strcmp-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S +index 4e88c70cc696b82d..f467582cbedd4535 100644 +--- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S ++++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S +@@ -1,5 +1,5 @@ + #define STRCMP __wcsncmp_avx2_rtm + #define USE_AS_STRNCMP 1 + #define USE_AS_WCSCMP 1 +- ++#define OVERFLOW_STRCMP __wcscmp_avx2_rtm + #include "strcmp-avx2-rtm.S" +diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S +index 4fa1de4d3f1f97ff..e9ede522b8bde27d 100644 +--- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S +@@ -1,5 +1,5 @@ + #define STRCMP __wcsncmp_avx2 + #define USE_AS_STRNCMP 1 + #define USE_AS_WCSCMP 1 +- ++#define OVERFLOW_STRCMP __wcscmp_avx2 + #include "strcmp-avx2.S" diff --git a/SOURCES/glibc-upstream-2.34-113.patch b/SOURCES/glibc-upstream-2.34-113.patch new file mode 100644 index 0000000..e83d23c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-113.patch @@ -0,0 +1,139 @@ +commit d093b677c36ef4b360bf30483b68b95d9f0ad1d2 +Author: Noah Goldstein +Date: Fri Feb 18 14:19:15 2022 -0600 + + x86: Test wcscmp RTM in the wcsncmp overflow case [BZ #28896] + + In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would + call strcmp-avx2 and wcscmp-avx2 respectively. This would have + not checks around vzeroupper and would trigger spurious + aborts. This commit fixes that. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on + AVX2 machines with and without RTM. + Reviewed-by: H.J. Lu + + (cherry picked from commit 7835d611af0854e69a0c71e3806f8fe379282d6f) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index af934d6ccf1fa337..cd94e683afd5b4a4 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -95,7 +95,9 @@ tests += \ + tst-strcpy-rtm \ + tst-strlen-rtm \ + tst-strncmp-rtm \ +- tst-strrchr-rtm ++ tst-strrchr-rtm \ ++ tst-wcsncmp-rtm \ ++# tests + + CFLAGS-tst-memchr-rtm.c += -mrtm + CFLAGS-tst-memcmp-rtm.c += -mrtm +@@ -107,6 +109,7 @@ CFLAGS-tst-strcpy-rtm.c += -mrtm + CFLAGS-tst-strlen-rtm.c += -mrtm + CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error + CFLAGS-tst-strrchr-rtm.c += -mrtm ++CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error + endif + + ifneq ($(enable-cet),no) +diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c +index 4d0004b58aae428d..4e9f094f39c72f67 100644 +--- a/sysdeps/x86/tst-strncmp-rtm.c ++++ b/sysdeps/x86/tst-strncmp-rtm.c +@@ -19,18 +19,32 @@ + #include + #include + ++#ifdef WIDE ++# define CHAR wchar_t ++# define MEMSET wmemset ++# define STRNCMP wcsncmp ++# define TEST_NAME wcsncmp ++#else /* !WIDE */ ++# define CHAR char ++# define MEMSET memset ++# define STRNCMP strncmp ++# define TEST_NAME strncmp ++#endif /* !WIDE */ ++ ++ ++ + #define LOOP 3000 + #define STRING_SIZE 1024 +-char string1[STRING_SIZE]; +-char string2[STRING_SIZE]; ++CHAR string1[STRING_SIZE]; ++CHAR string2[STRING_SIZE]; + + __attribute__ ((noinline, noclone)) + static int + prepare (void) + { +- memset (string1, 'a', STRING_SIZE - 1); +- memset (string2, 'a', STRING_SIZE - 1); +- if (strncmp (string1, string2, STRING_SIZE) == 0) ++ MEMSET (string1, 'a', STRING_SIZE - 1); ++ MEMSET (string2, 'a', STRING_SIZE - 1); ++ if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +@@ -40,7 +54,7 @@ __attribute__ ((noinline, noclone)) + static int + function (void) + { +- if (strncmp (string1, string2, STRING_SIZE) == 0) ++ if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return 0; + else + return 1; +@@ -50,7 +64,7 @@ __attribute__ ((noinline, noclone)) + static int + function_overflow (void) + { +- if (strncmp (string1, string2, SIZE_MAX) == 0) ++ if (STRNCMP (string1, string2, SIZE_MAX) == 0) + return 0; + else + return 1; +@@ -59,9 +73,9 @@ function_overflow (void) + static int + do_test (void) + { +- int status = do_test_1 ("strncmp", LOOP, prepare, function); ++ int status = do_test_1 (TEST_NAME, LOOP, prepare, function); + if (status != EXIT_SUCCESS) + return status; +- status = do_test_1 ("strncmp", LOOP, prepare, function_overflow); ++ status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow); + return status; + } +diff --git a/sysdeps/x86/tst-wcsncmp-rtm.c b/sysdeps/x86/tst-wcsncmp-rtm.c +new file mode 100644 +index 0000000000000000..bad3b863782c5e56 +--- /dev/null ++++ b/sysdeps/x86/tst-wcsncmp-rtm.c +@@ -0,0 +1,21 @@ ++/* Test case for wcsncmp inside a transactionally executing RTM region. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define WIDE 1 ++#include ++#include "tst-strncmp-rtm.c" diff --git a/SOURCES/glibc-upstream-2.34-114.patch b/SOURCES/glibc-upstream-2.34-114.patch new file mode 100644 index 0000000..863b88c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-114.patch @@ -0,0 +1,32 @@ +commit 15b00d2af0e56dcc8c244a36d6872d301b0c7185 +Author: Noah Goldstein +Date: Fri Feb 18 17:00:25 2022 -0600 + + x86: Fix TEST_NAME to make it a string in tst-strncmp-rtm.c + + Previously TEST_NAME was passing a function pointer. This didn't fail + because of the -Wno-error flag (to allow for overflow sizes passed + to strncmp/wcsncmp) + + Reviewed-by: H.J. Lu + (cherry picked from commit b98d0bbf747f39770e0caba7e984ce9f8f900330) + +diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c +index 4e9f094f39c72f67..aef9866cf2fbe774 100644 +--- a/sysdeps/x86/tst-strncmp-rtm.c ++++ b/sysdeps/x86/tst-strncmp-rtm.c +@@ -23,12 +23,12 @@ + # define CHAR wchar_t + # define MEMSET wmemset + # define STRNCMP wcsncmp +-# define TEST_NAME wcsncmp ++# define TEST_NAME "wcsncmp" + #else /* !WIDE */ + # define CHAR char + # define MEMSET memset + # define STRNCMP strncmp +-# define TEST_NAME strncmp ++# define TEST_NAME "strncmp" + #endif /* !WIDE */ + + diff --git a/SOURCES/glibc-upstream-2.34-117.patch b/SOURCES/glibc-upstream-2.34-117.patch new file mode 100644 index 0000000..62b8f3b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-117.patch @@ -0,0 +1,104 @@ +commit 3be79b72d556e3ac37075ad6b99eb5eac18e1402 +Author: John David Anglin +Date: Sun Mar 6 15:56:57 2022 +0000 + + Fix elf/tst-audit2 on hppa + + The test elf/tst-audit2 fails on hppa with a segmentation fault in the + long branch stub used to call malloc from calloc. This occurs because + the test is not a PIC executable and calloc is called from the dynamic + linker before the dp register is initialized in _dl_start_user. + + The fix is to move the dp register initialization into + elf_machine_runtime_setup. Since the address of $global$ can't be + loaded directly, we continue to use the DT_PLTGOT value from the + the main_map to initialize dp. Since l_main_map is not available + in v2.34 and earlier, we use a new function, elf_machine_main_map, + to find the main map. + +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index f048fd20728ccde6..24f0f47d8f1e25cd 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -159,6 +160,24 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + return (struct fdesc) { value.ip + reloc->r_addend, value.gp }; + } + ++static inline struct link_map * ++elf_machine_main_map (void) ++{ ++ struct link_map *main_map; ++ ++#if defined SHARED && IS_IN (rtld) ++ asm ( ++" bl 1f,%0\n" ++" addil L'_rtld_local - ($PIC_pcrel$0 - 1),%0\n" ++"1: ldw R'_rtld_local - ($PIC_pcrel$0 - 5)(%%r1),%0\n" ++ : "=r" (main_map) : : "r1"); ++#else ++ main_map = NULL; ++#endif ++ ++ return main_map; ++} ++ + /* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +@@ -174,6 +193,15 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + Elf32_Addr i[2]; + } sig = {{0x00,0xc0,0xff,0xee, 0xde,0xad,0xbe,0xef}}; + ++ /* Initialize dp register for main executable. */ ++ if (l == elf_machine_main_map ()) ++ { ++ register Elf32_Addr dp asm ("%r27"); ++ ++ dp = D_PTR (l, l_info[DT_PLTGOT]); ++ asm volatile ("" : : "r" (dp)); ++ } ++ + /* If we don't have a PLT we can just skip all this... */ + if (__builtin_expect (l->l_info[DT_JMPREL] == NULL,0)) + return lazy; +@@ -336,16 +364,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + its return value is the user program's entry point. */ + + #define RTLD_START \ +-/* Set up dp for any non-PIC lib constructors that may be called. */ \ +-static struct link_map * __attribute__((used)) \ +-set_dp (struct link_map *map) \ +-{ \ +- register Elf32_Addr dp asm ("%r27"); \ +- dp = D_PTR (map, l_info[DT_PLTGOT]); \ +- asm volatile ("" : : "r" (dp)); \ +- return map; \ +-} \ +- \ + asm ( \ + " .text\n" \ + " .globl _start\n" \ +@@ -445,14 +463,11 @@ asm ( \ + " stw %r24,-44(%sp)\n" \ + \ + ".Lnofix:\n" \ ++ /* Call _dl_init(main_map, argc, argv, envp). */ \ + " addil LT'_rtld_local,%r19\n" \ + " ldw RT'_rtld_local(%r1),%r26\n" \ +-" bl set_dp, %r2\n" \ + " ldw 0(%r26),%r26\n" \ + \ +- /* Call _dl_init(_dl_loaded, argc, argv, envp). */ \ +-" copy %r28,%r26\n" \ +- \ + /* envp = argv + argc + 1 */ \ + " sh2add %r25,%r24,%r23\n" \ + " bl _dl_init,%r2\n" \ diff --git a/SOURCES/glibc-upstream-2.34-118.patch b/SOURCES/glibc-upstream-2.34-118.patch new file mode 100644 index 0000000..b2b028e --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-118.patch @@ -0,0 +1,146 @@ +commit c6f9085ee4e913a0b8260340ac7b75c426b780ce +Author: John David Anglin +Date: Fri Feb 18 20:38:25 2022 +0000 + + hppa: Fix swapcontext + + This change fixes the failure of stdlib/tst-setcontext2 and + stdlib/tst-setcontext7 on hppa. The implementation of swapcontext + in C is broken. C saves the return pointer (rp) and any non + call-clobbered registers (in this case r3, r4 and r5) on the + stack. However, the setcontext call in swapcontext pops the + stack and subsequent calls clobber the saved registers. When + the context in oucp is restored, both tests fault. + + Here we rewrite swapcontext in assembly code to avoid using + the stack for register values that need to be used after + restoration. The getcontext and setcontext routines are + revised to save and restore register ret1 for normal returns. + We copy the oucp pointer to ret1. This allows access to + the old context after calling getcontext and setcontext. + + (cherry picked from commit 71b108d7eb33b2bf3e61d5e92d2a47f74c1f7d96) + +diff --git a/sysdeps/unix/sysv/linux/hppa/getcontext.S b/sysdeps/unix/sysv/linux/hppa/getcontext.S +index 1405b42819c38993..c8b690aab8ecc47c 100644 +--- a/sysdeps/unix/sysv/linux/hppa/getcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/getcontext.S +@@ -138,6 +138,8 @@ ENTRY(__getcontext) + stw %r19, -32(%sp) + .cfi_offset 19, 32 + #endif ++ stw %ret1, -60(%sp) ++ .cfi_offset 29, 4 + + /* Set up the trampoline registers. + r20, r23, r24, r25, r26 and r2 are clobbered +@@ -168,6 +170,7 @@ ENTRY(__getcontext) + #ifdef PIC + ldw -32(%sp), %r19 + #endif ++ ldw -60(%sp), %ret1 + bv %r0(%r2) + ldwm -64(%sp), %r4 + END(__getcontext) +diff --git a/sysdeps/unix/sysv/linux/hppa/setcontext.S b/sysdeps/unix/sysv/linux/hppa/setcontext.S +index 8fc5f5e56cb31f51..e1ae3aefcaac198d 100644 +--- a/sysdeps/unix/sysv/linux/hppa/setcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/setcontext.S +@@ -34,6 +34,8 @@ ENTRY(__setcontext) + stw %r19, -32(%sp) + .cfi_offset 19, 32 + #endif ++ stw %ret1, -60(%sp) ++ .cfi_offset 29, 4 + + /* Save ucp. */ + copy %r26, %r3 +@@ -155,6 +157,7 @@ ENTRY(__setcontext) + #ifdef PIC + ldw -32(%r30), %r19 + #endif ++ ldw -60(%r30), %ret1 + bv %r0(%r2) + ldwm -64(%r30), %r3 + L(pseudo_end): +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.c b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +index f9a8207543c164cb..562f00ff0546177d 100644 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.c ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +@@ -18,6 +18,7 @@ + . */ + + #include ++#include "ucontext_i.h" + + extern int __getcontext (ucontext_t *ucp); + extern int __setcontext (const ucontext_t *ucp); +@@ -25,17 +26,61 @@ extern int __setcontext (const ucontext_t *ucp); + int + __swapcontext (ucontext_t *oucp, const ucontext_t *ucp) + { ++ /* Save ucp in stack argument slot. */ ++ asm ("stw %r25,-40(%sp)"); ++ asm (".cfi_offset 25, -40"); ++ ++ /* Save rp for debugger. */ ++ asm ("stw %rp,-20(%sp)"); ++ asm (".cfi_offset 2, -20"); ++ ++ /* Copy rp to ret0 (r28). */ ++ asm ("copy %rp,%ret0"); ++ ++ /* Create a frame. */ ++ asm ("ldo 64(%sp),%sp"); ++ asm (".cfi_def_cfa_offset -64"); ++ + /* Save the current machine context to oucp. */ +- __getcontext (oucp); ++ asm ("bl __getcontext,%rp"); ++ ++ /* Copy oucp to register ret1 (r29). __getcontext saves and restores it ++ on a normal return. It is restored from oR29 on reactivation. */ ++ asm ("copy %r26,%ret1"); ++ ++ /* Pop frame. */ ++ asm ("ldo -64(%sp),%sp"); ++ asm (".cfi_def_cfa_offset 0"); ++ ++ /* Load return pointer from oR28. */ ++ asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); ++ ++ /* Return if error. */ ++ asm ("or,= %r0,%ret0,%r0"); ++ asm ("bv,n %r0(%rp)"); ++ ++ /* Load sc_sar flag. */ ++ asm ("ldw %0(%%ret1),%%r20" : : "i" (oSAR)); ++ ++ /* Return if oucp context has been reactivated. */ ++ asm ("or,= %r0,%r20,%r0"); ++ asm ("bv,n %r0(%rp)"); ++ ++ /* Mark sc_sar flag. */ ++ asm ("1: ldi 1,%r20"); ++ asm ("stw %%r20,%0(%%ret1)" : : "i" (oSAR)); ++ ++ /* Activate the machine context in ucp. */ ++ asm ("bl __setcontext,%rp"); ++ asm ("ldw -40(%sp),%r26"); + +- /* mark sc_sar flag to skip the setcontext call on reactivation. */ +- if (oucp->uc_mcontext.sc_sar == 0) { +- oucp->uc_mcontext.sc_sar++; ++ /* Load return pointer. */ ++ asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); + +- /* Restore the machine context in ucp. */ +- __setcontext (ucp); +- } ++ /* A successful call to setcontext does not return. */ ++ asm ("bv,n %r0(%rp)"); + ++ /* Make gcc happy. */ + return 0; + } + diff --git a/SOURCES/glibc-upstream-2.34-119.patch b/SOURCES/glibc-upstream-2.34-119.patch new file mode 100644 index 0000000..cc25de7 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-119.patch @@ -0,0 +1,177 @@ +commit f610d2935f041c5f41ddcb96924ea42ca2fb5ea5 +Author: John David Anglin +Date: Tue Feb 22 17:28:46 2022 +0000 + + hppa: Revise gettext trampoline design + + The current getcontext return trampoline is overly complex and it + unnecessarily clobbers several registers. By saving the context + pointer (r26) in the context, __getcontext_ret can restore any + registers not restored by setcontext. This allows getcontext to + save and restore the entire register context present when getcontext + is entered. We use the unused oR0 context slot for the return + from __getcontext_ret. + + While this is not directly useful in C, it can be exploited in + assembly code. Registers r20, r23, r24 and r25 are not clobbered + in the call path to getcontext. This allows a small simplification + of swapcontext. + + It also allows saving and restoring the 6-bit SAR register in the + LSB of the oSAR context slot. The getcontext flag value can be + stored in the MSB of the oSAR slot. + + (cherry picked from commit 9e7e5fda38471e00d1190479ea91d7b08ae3e304) + +diff --git a/sysdeps/unix/sysv/linux/hppa/getcontext.S b/sysdeps/unix/sysv/linux/hppa/getcontext.S +index c8b690aab8ecc47c..4f2e2587d60effc8 100644 +--- a/sysdeps/unix/sysv/linux/hppa/getcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/getcontext.S +@@ -22,22 +22,28 @@ + #include "ucontext_i.h" + + +- /* Trampoline function. Non-standard calling ABI. */ ++ /* Trampoline function. Non-standard calling ABI. */ + /* Can not use ENTRY(__getcontext_ret) here. */ + .type __getcontext_ret, @function + .hidden __getcontext_ret + __getcontext_ret: + .proc + .callinfo FRAME=0,NO_CALLS +- /* r26-r23 contain original r3-r6, but because setcontext +- does not reload r3-r6 (it's using them as temporaries) +- we must save them elsewhere and swap them back in. */ +- copy %r23, %r3 +- copy %r24, %r4 +- copy %r25, %r5 +- copy %r26, %r6 +- /* r20 contains original return pointer. */ +- bv 0(%r20) ++ /* Because setcontext does not reload r3-r6 (it's using them ++ as temporaries), we must load them ourself. */ ++ ldw oR3(%r26), %r3 ++ ldw oR4(%r26), %r4 ++ ldw oR5(%r26), %r5 ++ ldw oR6(%r26), %r6 ++ ++ /* Also reload registers clobbered by $$dyncall. */ ++ ldw oR21(%r26), %r21 ++ ldw oR22(%r26), %r22 ++ ldw oR31(%r26), %r31 ++ ++ /* oR0 contains original return pointer. */ ++ ldw oR0(%r26), %rp ++ bv 0(%rp) + copy %r0, %ret0 + .procend + .size __getcontext_ret, .-__getcontext_ret +@@ -65,13 +71,13 @@ ENTRY(__getcontext) + stw %r17, oR17(%r26) + stw %r18, oR18(%r26) + stw %r19, oR19(%r26) +- /* stw %r20, oR20(%r26) - used for trampoline. */ ++ stw %r20, oR20(%r26) + stw %r21, oR21(%r26) + stw %r22, oR22(%r26) +- /* stw %r23, oR23(%r26) - used for trampoline. */ +- /* stw %r24, oR24(%r26) - used for trampoline. */ +- /* stw %r25, oR25(%r26) - used for trampoline. */ +- /* stw %r26, oR26(%r26) - used for trampoline. */ ++ stw %r23, oR23(%r26) ++ stw %r24, oR24(%r26) ++ stw %r25, oR25(%r26) ++ stw %r26, oR26(%r26) + stw %r27, oR27(%r26) + stw %r28, oR28(%r26) + stw %r29, oR29(%r26) +@@ -90,7 +96,10 @@ ENTRY(__getcontext) + stw %r0, oIASQ1(%r26) + stw %r0, oIAOQ0(%r26) + stw %r0, oIAOQ1(%r26) +- stw %r0, oSAR(%r26) /* used as flag in swapcontext(). */ ++ ++ /* Save SAR register. */ ++ mfctl %sar, %r1 ++ stw %r1, oSAR(%r26) /* MSB used as flag in swapcontext(). */ + + + /* Store floating-point regs. */ +@@ -142,13 +151,8 @@ ENTRY(__getcontext) + .cfi_offset 29, 4 + + /* Set up the trampoline registers. +- r20, r23, r24, r25, r26 and r2 are clobbered +- by call to getcontext() anyway. Reuse them. */ +- stw %r2, oR20(%r26) +- stw %r3, oR23(%r26) +- stw %r4, oR24(%r26) +- stw %r5, oR25(%r26) +- stw %r6, oR26(%r26) ++ Use oR0 context slot to save return value. */ ++ stw %r2, oR0(%r26) + #ifdef PIC + addil LT%__getcontext_ret, %r19 + ldw RT%__getcontext_ret(%r1), %r1 +diff --git a/sysdeps/unix/sysv/linux/hppa/setcontext.S b/sysdeps/unix/sysv/linux/hppa/setcontext.S +index e1ae3aefcaac198d..616405b80c61d531 100644 +--- a/sysdeps/unix/sysv/linux/hppa/setcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/setcontext.S +@@ -76,7 +76,7 @@ ENTRY(__setcontext) + ldw oR18(%r3), %r18 + ldw oR19(%r3), %r19 + ldw oR20(%r3), %r20 +- ldw oR21(%r3), %r21 ++ ldw oR21(%r3), %r21 /* maybe clobbered by dyncall */ + /* ldw oR22(%r3), %r22 - dyncall arg. */ + ldw oR23(%r3), %r23 + ldw oR24(%r3), %r24 +@@ -88,6 +88,10 @@ ENTRY(__setcontext) + ldw oR30(%r3), %sp + /* ldw oR31(%r3), %r31 - dyncall scratch register */ + ++ /* Restore SAR register. */ ++ ldw oSAR(%r3), %r22 ++ mtsar %r22 ++ + /* Restore floating-point registers. */ + ldo oFPREGS31(%r3), %r22 + fldds 0(%r22), %fr31 +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.c b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +index 562f00ff0546177d..1664f68c7b9982e8 100644 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.c ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +@@ -26,10 +26,6 @@ extern int __setcontext (const ucontext_t *ucp); + int + __swapcontext (ucontext_t *oucp, const ucontext_t *ucp) + { +- /* Save ucp in stack argument slot. */ +- asm ("stw %r25,-40(%sp)"); +- asm (".cfi_offset 25, -40"); +- + /* Save rp for debugger. */ + asm ("stw %rp,-20(%sp)"); + asm (".cfi_offset 2, -20"); +@@ -60,7 +56,7 @@ __swapcontext (ucontext_t *oucp, const ucontext_t *ucp) + asm ("bv,n %r0(%rp)"); + + /* Load sc_sar flag. */ +- asm ("ldw %0(%%ret1),%%r20" : : "i" (oSAR)); ++ asm ("ldb %0(%%ret1),%%r20" : : "i" (oSAR)); + + /* Return if oucp context has been reactivated. */ + asm ("or,= %r0,%r20,%r0"); +@@ -68,11 +64,11 @@ __swapcontext (ucontext_t *oucp, const ucontext_t *ucp) + + /* Mark sc_sar flag. */ + asm ("1: ldi 1,%r20"); +- asm ("stw %%r20,%0(%%ret1)" : : "i" (oSAR)); ++ asm ("stb %%r20,%0(%%ret1)" : : "i" (oSAR)); + + /* Activate the machine context in ucp. */ + asm ("bl __setcontext,%rp"); +- asm ("ldw -40(%sp),%r26"); ++ asm ("ldw %0(%%ret1),%%r26" : : "i" (oR25)); + + /* Load return pointer. */ + asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); diff --git a/SOURCES/glibc-upstream-2.34-120.patch b/SOURCES/glibc-upstream-2.34-120.patch new file mode 100644 index 0000000..2fe0c55 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-120.patch @@ -0,0 +1,27 @@ +commit 40fc6a74ee3dd600c84d311d91cbb16962f11a71 +Author: John David Anglin +Date: Mon Feb 28 15:47:38 2022 +0000 + + nptl: Fix cleanups for stack grows up [BZ# 28899] + + _STACK_GROWS_DOWN is defined to 0 when the stack grows up. The + code in unwind.c used `#ifdef _STACK_GROWS_DOWN' to selct the + stack grows down define for FRAME_LEFT. As a result, the + _STACK_GROWS_DOWN define was always selected and cleanups were + incorrectly sequenced when the stack grows up. + + (cherry picked from commit 2bbc694df279020a6620096d31c1e05c93966f9b) + +diff --git a/nptl/unwind.c b/nptl/unwind.c +index f50997f728ccde0d..404fab46d00e9f10 100644 +--- a/nptl/unwind.c ++++ b/nptl/unwind.c +@@ -27,7 +27,7 @@ + #include + #include + +-#ifdef _STACK_GROWS_DOWN ++#if _STACK_GROWS_DOWN + # define FRAME_LEFT(frame, other, adj) \ + ((uintptr_t) frame - adj >= (uintptr_t) other - adj) + #elif _STACK_GROWS_UP diff --git a/SOURCES/glibc-upstream-2.34-121.patch b/SOURCES/glibc-upstream-2.34-121.patch new file mode 100644 index 0000000..3f74efd --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-121.patch @@ -0,0 +1,122 @@ +commit 6c9c2307657529e52c5fa7037618835f2a50b916 +Author: John David Anglin +Date: Sun Mar 6 16:04:32 2022 +0000 + + hppa: Fix warnings from _dl_lookup_address + + This change fixes two warnings from _dl_lookup_address. + + The first warning comes from dropping the volatile keyword from + desc in the call to _dl_read_access_allowed. We now have a full + atomic barrier between loading desc[0] and the access check, so + desc no longer needs to be declared as volatile. + + The second warning comes from the implicit declaration of + _dl_fix_reloc_arg. This is fixed by including dl-runtime.h and + declaring _dl_fix_reloc_arg in dl-runtime.h. + +diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c +index 62ef68b62bd601f4..cd4f77c0ecfd376f 100644 +--- a/sysdeps/hppa/dl-fptr.c ++++ b/sysdeps/hppa/dl-fptr.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -351,21 +352,20 @@ _dl_lookup_address (const void *address) + { + ElfW(Addr) addr = (ElfW(Addr)) address; + ElfW(Word) reloc_arg; +- volatile unsigned int *desc; +- unsigned int *gptr; ++ unsigned int *desc, *gptr; + + /* Return ADDR if the least-significant two bits of ADDR are not consistent + with ADDR being a linker defined function pointer. The normal value for + a code address in a backtrace is 3. */ +- if (((unsigned int) addr & 3) != 2) ++ if (((uintptr_t) addr & 3) != 2) + return addr; + + /* Handle special case where ADDR points to page 0. */ +- if ((unsigned int) addr < 4096) ++ if ((uintptr_t) addr < 4096) + return addr; + + /* Clear least-significant two bits from descriptor address. */ +- desc = (unsigned int *) ((unsigned int) addr & ~3); ++ desc = (unsigned int *) ((uintptr_t) addr & ~3); + if (!_dl_read_access_allowed (desc)) + return addr; + +@@ -376,7 +376,7 @@ _dl_lookup_address (const void *address) + /* Then load first word of candidate descriptor. It should be a pointer + with word alignment and point to memory that can be read. */ + gptr = (unsigned int *) desc[0]; +- if (((unsigned int) gptr & 3) != 0 ++ if (((uintptr_t) gptr & 3) != 0 + || !_dl_read_access_allowed (gptr)) + return addr; + +@@ -400,10 +400,11 @@ _dl_lookup_address (const void *address) + + /* If gp has been resolved, we need to hunt for relocation offset. */ + if (!(reloc_arg & PA_GP_RELOC)) +- reloc_arg = _dl_fix_reloc_arg (addr, l); ++ reloc_arg = _dl_fix_reloc_arg ((struct fdesc *) addr, l); + + _dl_fixup (l, reloc_arg); + } + + return (ElfW(Addr)) desc[0]; + } ++rtld_hidden_def (_dl_lookup_address) +diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h +index a9a927f26c6fec09..2f6991aa16e87a00 100644 +--- a/sysdeps/hppa/dl-lookupcfg.h ++++ b/sysdeps/hppa/dl-lookupcfg.h +@@ -30,6 +30,7 @@ rtld_hidden_proto (_dl_symbol_address) + #define DL_SYMBOL_ADDRESS(map, ref) _dl_symbol_address(map, ref) + + Elf32_Addr _dl_lookup_address (const void *address); ++rtld_hidden_proto (_dl_lookup_address) + + #define DL_LOOKUP_ADDRESS(addr) _dl_lookup_address ((const void *) addr) + +diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c +index e7fbb7417d866bb0..a71b5b2013abf723 100644 +--- a/sysdeps/hppa/dl-runtime.c ++++ b/sysdeps/hppa/dl-runtime.c +@@ -25,8 +25,7 @@ + return that to the caller. The caller will continue on to call + _dl_fixup with the relocation offset. */ + +-ElfW(Word) +-attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++ElfW(Word) __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE + _dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) + { + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; +@@ -52,3 +51,4 @@ _dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) + ABORT_INSTRUCTION; + return 0; + } ++rtld_hidden_def (_dl_fix_reloc_arg) +diff --git a/sysdeps/hppa/dl-runtime.h b/sysdeps/hppa/dl-runtime.h +index 5d6ee53b076d5e0e..9913539b5f0e7435 100644 +--- a/sysdeps/hppa/dl-runtime.h ++++ b/sysdeps/hppa/dl-runtime.h +@@ -17,6 +17,9 @@ + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + ++ElfW(Word) _dl_fix_reloc_arg (struct fdesc *, struct link_map *); ++rtld_hidden_proto (_dl_fix_reloc_arg) ++ + /* Clear PA_GP_RELOC bit in relocation offset. */ + static inline uintptr_t + reloc_offset (uintptr_t plt0, uintptr_t pltn) diff --git a/SOURCES/glibc-upstream-2.34-122.patch b/SOURCES/glibc-upstream-2.34-122.patch new file mode 100644 index 0000000..e628384 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-122.patch @@ -0,0 +1,26 @@ +commit b5032c3d37aa614644c7afbad33bb8226a52e6da +Author: Florian Weimer +Date: Mon Feb 28 11:50:41 2022 +0100 + + io: Add fsync call in tst-stat + + io/tst-stat and io/tst-stat-lfs fail sporadically on the Fedora + builders, and this change hopefully helps to avoid the issue. + + (cherry picked from commit ae132284092edc5885315b44cd17d5ea91177e49) + +diff --git a/io/tst-stat.c b/io/tst-stat.c +index 82e965de6ad87f61..be20cf16d70d05cc 100644 +--- a/io/tst-stat.c ++++ b/io/tst-stat.c +@@ -69,6 +69,10 @@ do_test (void) + TEST_VERIFY_EXIT (fd >= 0); + support_write_file_string (path, "abc"); + ++ /* This should help to prevent delayed allocation, which may result ++ in a spurious stx_blocks/st_blocks difference. */ ++ fsync (fd); ++ + bool check_ns = support_stat_nanoseconds (path); + if (!check_ns) + printf ("warning: timestamp with nanoseconds not supported\n"); diff --git a/SOURCES/glibc-upstream-2.34-123.patch b/SOURCES/glibc-upstream-2.34-123.patch new file mode 100644 index 0000000..22b1d2a --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-123.patch @@ -0,0 +1,56 @@ +commit b53f0c11de409b04560a70570178d1a9d03d5860 +Author: Florian Weimer +Date: Fri Mar 11 08:23:56 2022 +0100 + + nss: Do not mention NSS test modules in + + They are not actually installed. Use the nss_files version instead + in nss/Makefile, similar to how __nss_shlib_revision is derived + from LIBNSS_FILES_SO. + + Reviewed-by: Carlos O'Donell + (cherry picked from commit aefc79ab5ad4bb9feea2876720cec70dca7cd8ed) + +diff --git a/nss/Makefile b/nss/Makefile +index bccf9f2806c15651..e223243d9d62041c 100644 +--- a/nss/Makefile ++++ b/nss/Makefile +@@ -171,17 +171,14 @@ $(objpfx)/libnss_test1.so: $(objpfx)nss_test1.os $(link-libc-deps) + $(objpfx)/libnss_test2.so: $(objpfx)nss_test2.os $(link-libc-deps) + $(build-module) + $(objpfx)nss_test2.os : nss_test1.c +-ifdef libnss_test1.so-version +-$(objpfx)/libnss_test1.so$(libnss_test1.so-version): $(objpfx)/libnss_test1.so ++# Use the nss_files suffix for these objects as well. ++$(objpfx)/libnss_test1.so$(libnss_files.so-version): $(objpfx)/libnss_test1.so + $(make-link) +-endif +-ifdef libnss_test2.so-version +-$(objpfx)/libnss_test2.so$(libnss_test2.so-version): $(objpfx)/libnss_test2.so ++$(objpfx)/libnss_test2.so$(libnss_files.so-version): $(objpfx)/libnss_test2.so + $(make-link) +-endif + $(patsubst %,$(objpfx)%.out,$(tests) $(tests-container)) : \ +- $(objpfx)/libnss_test1.so$(libnss_test1.so-version) \ +- $(objpfx)/libnss_test2.so$(libnss_test2.so-version) ++ $(objpfx)/libnss_test1.so$(libnss_files.so-version) \ ++ $(objpfx)/libnss_test2.so$(libnss_files.so-version) + + ifeq (yes,$(have-thread-library)) + $(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library) +diff --git a/shlib-versions b/shlib-versions +index df6603e6992b8382..b87ab50c59af1bfd 100644 +--- a/shlib-versions ++++ b/shlib-versions +@@ -47,11 +47,6 @@ libnss_ldap=2 + libnss_hesiod=2 + libnss_db=2 + +-# Tests for NSS. They must have the same NSS_SHLIB_REVISION number as +-# the rest. +-libnss_test1=2 +-libnss_test2=2 +- + # Version for libnsl with YP and NIS+ functions. + libnsl=1 + diff --git a/SOURCES/glibc-upstream-2.34-124.patch b/SOURCES/glibc-upstream-2.34-124.patch new file mode 100644 index 0000000..0ee2830 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-124.patch @@ -0,0 +1,224 @@ +commit 54b12733959238204d7b0e46e69fc7f7d8890b20 +Author: Florian Weimer +Date: Fri Mar 11 08:23:56 2022 +0100 + + nss: Protect against errno changes in function lookup (bug 28953) + + dlopen may clobber errno. The nss_test_errno module uses an ELF + constructor to achieve that, but there could be internal errors + during dlopen that cause this, too. Therefore, the NSS framework + has to guard against such errno clobbers. + + __nss_module_get_function is currently the only function that calls + __nss_module_load, so it is sufficient to save and restore errno + around this call. + + Reviewed-by: Carlos O'Donell + (cherry picked from commit 9bdf92c79d63b42f931101bb6df87129c408b0c4) + +diff --git a/nss/Makefile b/nss/Makefile +index e223243d9d62041c..716bc8f6ef5276b0 100644 +--- a/nss/Makefile ++++ b/nss/Makefile +@@ -60,7 +60,8 @@ tests = test-netdb test-digits-dots tst-nss-getpwent bug17079 \ + tst-nss-test1 \ + tst-nss-test2 \ + tst-nss-test4 \ +- tst-nss-test5 ++ tst-nss-test5 \ ++ tst-nss-test_errno + xtests = bug-erange + + tests-container = \ +@@ -132,7 +133,7 @@ libnss_compat-inhibit-o = $(filter-out .os,$(object-suffixes)) + ifeq ($(build-static-nss),yes) + tests-static += tst-nss-static + endif +-extra-test-objs += nss_test1.os nss_test2.os ++extra-test-objs += nss_test1.os nss_test2.os nss_test_errno.os + + include ../Rules + +@@ -166,19 +167,26 @@ rtld-tests-LDFLAGS += -Wl,--dynamic-list=nss_test.ver + + libof-nss_test1 = extramodules + libof-nss_test2 = extramodules ++libof-nss_test_errno = extramodules + $(objpfx)/libnss_test1.so: $(objpfx)nss_test1.os $(link-libc-deps) + $(build-module) + $(objpfx)/libnss_test2.so: $(objpfx)nss_test2.os $(link-libc-deps) + $(build-module) ++$(objpfx)/libnss_test_errno.so: $(objpfx)nss_test_errno.os $(link-libc-deps) ++ $(build-module) + $(objpfx)nss_test2.os : nss_test1.c + # Use the nss_files suffix for these objects as well. + $(objpfx)/libnss_test1.so$(libnss_files.so-version): $(objpfx)/libnss_test1.so + $(make-link) + $(objpfx)/libnss_test2.so$(libnss_files.so-version): $(objpfx)/libnss_test2.so + $(make-link) ++$(objpfx)/libnss_test_errno.so$(libnss_files.so-version): \ ++ $(objpfx)/libnss_test_errno.so ++ $(make-link) + $(patsubst %,$(objpfx)%.out,$(tests) $(tests-container)) : \ + $(objpfx)/libnss_test1.so$(libnss_files.so-version) \ +- $(objpfx)/libnss_test2.so$(libnss_files.so-version) ++ $(objpfx)/libnss_test2.so$(libnss_files.so-version) \ ++ $(objpfx)/libnss_test_errno.so$(libnss_files.so-version) + + ifeq (yes,$(have-thread-library)) + $(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library) +diff --git a/nss/nss_module.c b/nss/nss_module.c +index b28cb94a6a0aeb41..3a4a464256121e41 100644 +--- a/nss/nss_module.c ++++ b/nss/nss_module.c +@@ -330,8 +330,18 @@ name_search (const void *left, const void *right) + void * + __nss_module_get_function (struct nss_module *module, const char *name) + { ++ /* A successful dlopen might clobber errno. */ ++ int saved_errno = errno; ++ + if (!__nss_module_load (module)) +- return NULL; ++ { ++ /* Reporting module load failure is currently inaccurate. See ++ bug 22041. Not changing errno is the conservative choice. */ ++ __set_errno (saved_errno); ++ return NULL; ++ } ++ ++ __set_errno (saved_errno); + + function_name *name_entry = bsearch (name, nss_function_name_array, + array_length (nss_function_name_array), +diff --git a/nss/nss_test_errno.c b/nss/nss_test_errno.c +new file mode 100644 +index 0000000000000000..680f8a07b97fe263 +--- /dev/null ++++ b/nss/nss_test_errno.c +@@ -0,0 +1,58 @@ ++/* NSS service provider with errno clobber. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++/* Catch misnamed and functions. */ ++#pragma GCC diagnostic error "-Wmissing-prototypes" ++NSS_DECLARE_MODULE_FUNCTIONS (test_errno) ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ /* An arbitrary error code which is otherwise not used. */ ++ errno = ELIBBAD; ++} ++ ++/* Lookup functions for pwd follow that do not return any data. */ ++ ++/* Catch misnamed function definitions. */ ++ ++enum nss_status ++_nss_test_errno_setpwent (int stayopen) ++{ ++ setenv ("_nss_test_errno_setpwent", "yes", 1); ++ return NSS_STATUS_SUCCESS; ++} ++ ++enum nss_status ++_nss_test_errno_getpwent_r (struct passwd *result, ++ char *buffer, size_t size, int *errnop) ++{ ++ setenv ("_nss_test_errno_getpwent_r", "yes", 1); ++ return NSS_STATUS_NOTFOUND; ++} ++ ++enum nss_status ++_nss_test_errno_endpwent (void) ++{ ++ setenv ("_nss_test_errno_endpwent", "yes", 1); ++ return NSS_STATUS_SUCCESS; ++} +diff --git a/nss/tst-nss-test_errno.c b/nss/tst-nss-test_errno.c +new file mode 100644 +index 0000000000000000..d2c42dd363a38b0e +--- /dev/null ++++ b/nss/tst-nss-test_errno.c +@@ -0,0 +1,61 @@ ++/* getpwent failure when dlopen clobbers errno (bug 28953). ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ __nss_configure_lookup ("passwd", "files test_errno"); ++ ++ errno = 0; ++ setpwent (); ++ TEST_COMPARE (errno, 0); ++ ++ bool root_seen = false; ++ while (true) ++ { ++ errno = 0; ++ struct passwd *e = getpwent (); ++ if (e == NULL) ++ break; ++ if (strcmp (e->pw_name, "root")) ++ root_seen = true; ++ } ++ ++ TEST_COMPARE (errno, 0); ++ TEST_VERIFY (root_seen); ++ ++ errno = 0; ++ endpwent (); ++ TEST_COMPARE (errno, 0); ++ ++ TEST_COMPARE_STRING (getenv ("_nss_test_errno_setpwent"), "yes"); ++ TEST_COMPARE_STRING (getenv ("_nss_test_errno_getpwent_r"), "yes"); ++ TEST_COMPARE_STRING (getenv ("_nss_test_errno_endpwent"), "yes"); ++ ++ return 0; ++} ++ ++#include diff --git a/SOURCES/glibc-upstream-2.34-125.patch b/SOURCES/glibc-upstream-2.34-125.patch new file mode 100644 index 0000000..208da9b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-125.patch @@ -0,0 +1,345 @@ +commit c82bdf033f93a710044e25f721340c26e89a3769 +Author: Siddhesh Poyarekar +Date: Tue Oct 12 12:29:13 2021 +0530 + + Don't add access size hints to fortifiable functions + + In the context of a function definition, the size hints imply that the + size of an object pointed to by one parameter is another parameter. + This doesn't make sense for the fortified versions of the functions + since that's the bit it's trying to validate. + + This is harmless with __builtin_object_size since it has fairly simple + semantics when it comes to objects passed as function parameters. + With __builtin_dynamic_object_size we could (as my patchset for gcc[1] + already does) use the access attribute to determine the object size in + the general case but it misleads the fortified functions. + + Basically the problem occurs when access attributes are present on + regular functions that have inline fortified definitions to generate + _chk variants; the attributes get inherited by these definitions, + causing problems when analyzing them. For example with poll(fds, nfds, + timeout), nfds is hinted using the __attr_access as being the size of + fds. + + Now, when analyzing the inline function definition in bits/poll2.h, the + compiler sees that nfds is the size of fds and tries to use that + information in the function body. In _FORTIFY_SOURCE=3 case, where the + object size could be a non-constant expression, this information results + in the conclusion that nfds is the size of fds, which defeats the + purpose of the implementation because we're trying to check here if nfds + does indeed represent the size of fds. Hence for this case, it is best + to not have the access attribute. + + With the attributes gone, the expression evaluation should get delayed + until the function is actually inlined into its destinations. + + Disable the access attribute for fortified function inline functions + when building at _FORTIFY_SOURCE=3 to make this work better. The + access attributes remain for the _chk variants since they can be used + by the compiler to warn when the caller is passing invalid arguments. + + [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581125.html + + Signed-off-by: Siddhesh Poyarekar + (cherry picked from commit e938c02748402c50f60ba0eb983273e7b52937d1) + +diff --git a/io/bits/poll2.h b/io/bits/poll2.h +index a623678c09f9f04f..be74d020f2e0e434 100644 +--- a/io/bits/poll2.h ++++ b/io/bits/poll2.h +@@ -33,7 +33,7 @@ extern int __REDIRECT (__poll_chk_warn, (struct pollfd *__fds, nfds_t __nfds, + __poll_chk) + __warnattr ("poll called with fds buffer too small file nfds entries"); + +-__fortify_function __attr_access ((__write_only__, 1, 2)) int ++__fortify_function __fortified_attr_access (__write_only__, 1, 2) int + poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) + { + if (__glibc_objsize (__fds) != (__SIZE_TYPE__) -1) +@@ -64,7 +64,7 @@ extern int __REDIRECT (__ppoll_chk_warn, (struct pollfd *__fds, nfds_t __nfds, + __ppoll_chk) + __warnattr ("ppoll called with fds buffer too small file nfds entries"); + +-__fortify_function __attr_access ((__write_only__, 1, 2)) int ++__fortify_function __fortified_attr_access (__write_only__, 1, 2) int + ppoll (struct pollfd *__fds, nfds_t __nfds, const struct timespec *__timeout, + const __sigset_t *__ss) + { +diff --git a/io/sys/poll.h b/io/sys/poll.h +index e640efb2bce7ea67..751c7f5f72db8be2 100644 +--- a/io/sys/poll.h ++++ b/io/sys/poll.h +@@ -52,7 +52,7 @@ __BEGIN_DECLS + This function is a cancellation point and therefore not marked with + __THROW. */ + extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + + #ifdef __USE_GNU + /* Like poll, but before waiting the threads signal mask is replaced +@@ -64,7 +64,7 @@ extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) + extern int ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + + # ifdef __USE_TIME_BITS64 + # ifdef __REDIRECT +@@ -72,7 +72,7 @@ extern int __REDIRECT (ppoll, (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, + const __sigset_t *__ss), + __ppoll64) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + # else + # define ppoll __ppoll64 + # endif +diff --git a/libio/bits/stdio2.h b/libio/bits/stdio2.h +index 3f0cab1254b02c43..4f016a563857a137 100644 +--- a/libio/bits/stdio2.h ++++ b/libio/bits/stdio2.h +@@ -258,7 +258,7 @@ extern char *__REDIRECT (__fgets_chk_warn, + __wur __warnattr ("fgets called with bigger size than length " + "of destination buffer"); + +-__fortify_function __wur __attr_access ((__write_only__, 1, 2)) char * ++__fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char * + fgets (char *__restrict __s, int __n, FILE *__restrict __stream) + { + if (__glibc_objsize (__s) != (size_t) -1) +@@ -320,7 +320,7 @@ extern char *__REDIRECT (__fgets_unlocked_chk_warn, + __wur __warnattr ("fgets_unlocked called with bigger size than length " + "of destination buffer"); + +-__fortify_function __wur __attr_access ((__write_only__, 1, 2)) char * ++__fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char * + fgets_unlocked (char *__restrict __s, int __n, FILE *__restrict __stream) + { + if (__glibc_objsize (__s) != (size_t) -1) +diff --git a/libio/stdio.h b/libio/stdio.h +index 497da016ffa2e230..abefe640e52d18d5 100644 +--- a/libio/stdio.h ++++ b/libio/stdio.h +@@ -584,7 +584,7 @@ extern int putw (int __w, FILE *__stream); + This function is a possible cancellation point and therefore not + marked with __THROW. */ + extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream) +- __wur __attr_access ((__write_only__, 1, 2)); ++ __wur __fortified_attr_access (__write_only__, 1, 2); + + #if __GLIBC_USE (DEPRECATED_GETS) + /* Get a newline-terminated string from stdin, removing the newline. +@@ -608,7 +608,7 @@ extern char *gets (char *__s) __wur __attribute_deprecated__; + therefore not marked with __THROW. */ + extern char *fgets_unlocked (char *__restrict __s, int __n, + FILE *__restrict __stream) __wur +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + #endif + + +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index e490fc1aebeadc3d..cd836441a9807d6a 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -603,12 +603,22 @@ _Static_assert (0, "IEEE 128-bits long double requires redirection on this platf + size-index is not provided: + access (access-mode, [, ]) */ + # define __attr_access(x) __attribute__ ((__access__ x)) ++/* For _FORTIFY_SOURCE == 3 we use __builtin_dynamic_object_size, which may ++ use the access attribute to get object sizes from function definition ++ arguments, so we can't use them on functions we fortify. Drop the object ++ size hints for such functions. */ ++# if __USE_FORTIFY_LEVEL == 3 ++# define __fortified_attr_access(a, o, s) __attribute__ ((__access__ (a, o))) ++# else ++# define __fortified_attr_access(a, o, s) __attr_access ((a, o, s)) ++# endif + # if __GNUC_PREREQ (11, 0) + # define __attr_access_none(argno) __attribute__ ((__access__ (__none__, argno))) + # else + # define __attr_access_none(argno) + # endif + #else ++# define __fortified_attr_access(a, o, s) + # define __attr_access(x) + # define __attr_access_none(argno) + #endif +diff --git a/posix/unistd.h b/posix/unistd.h +index 8224c5fbc956306f..7a61ff5e868c3456 100644 +--- a/posix/unistd.h ++++ b/posix/unistd.h +@@ -369,7 +369,7 @@ extern void closefrom (int __lowfd) __THROW; + This function is a cancellation point and therefore not marked with + __THROW. */ + extern ssize_t read (int __fd, void *__buf, size_t __nbytes) __wur +- __attr_access ((__write_only__, 2, 3)); ++ __fortified_attr_access (__write_only__, 2, 3); + + /* Write N bytes of BUF to FD. Return the number written, or -1. + +@@ -388,7 +388,7 @@ extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur + __THROW. */ + extern ssize_t pread (int __fd, void *__buf, size_t __nbytes, + __off_t __offset) __wur +- __attr_access ((__write_only__, 2, 3)); ++ __fortified_attr_access (__write_only__, 2, 3); + + /* Write N bytes of BUF to FD at the given position OFFSET without + changing the file pointer. Return the number written, or -1. +@@ -404,7 +404,7 @@ extern ssize_t pwrite (int __fd, const void *__buf, size_t __n, + extern ssize_t __REDIRECT (pread, (int __fd, void *__buf, size_t __nbytes, + __off64_t __offset), + pread64) __wur +- __attr_access ((__write_only__, 2, 3)); ++ __fortified_attr_access (__write_only__, 2, 3); + extern ssize_t __REDIRECT (pwrite, (int __fd, const void *__buf, + size_t __nbytes, __off64_t __offset), + pwrite64) __wur +@@ -421,7 +421,7 @@ extern ssize_t __REDIRECT (pwrite, (int __fd, const void *__buf, + or 0 for EOF. */ + extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes, + __off64_t __offset) __wur +- __attr_access ((__write_only__, 2, 3)); ++ __fortified_attr_access (__write_only__, 2, 3); + /* Write N bytes of BUF to FD at the given position OFFSET without + changing the file pointer. Return the number written, or -1. */ + extern ssize_t pwrite64 (int __fd, const void *__buf, size_t __n, +@@ -642,7 +642,7 @@ extern long int sysconf (int __name) __THROW; + #ifdef __USE_POSIX2 + /* Get the value of the string-valued system variable NAME. */ + extern size_t confstr (int __name, char *__buf, size_t __len) __THROW +- __attr_access ((__write_only__, 2, 3)); ++ __fortified_attr_access (__write_only__, 2, 3); + #endif + + +@@ -709,7 +709,7 @@ extern __gid_t getegid (void) __THROW; + the calling process is in. Otherwise, fill in the group IDs + of its supplementary groups in LIST and return the number written. */ + extern int getgroups (int __size, __gid_t __list[]) __THROW __wur +- __attr_access ((__write_only__, 2, 1)); ++ __fortified_attr_access (__write_only__, 2, 1); + #ifdef __USE_GNU + /* Return nonzero iff the calling process is in group GID. */ + extern int group_member (__gid_t __gid) __THROW; +@@ -801,7 +801,8 @@ extern char *ttyname (int __fd) __THROW; + /* Store at most BUFLEN characters of the pathname of the terminal FD is + open on in BUF. Return 0 on success, otherwise an error number. */ + extern int ttyname_r (int __fd, char *__buf, size_t __buflen) +- __THROW __nonnull ((2)) __wur __attr_access ((__write_only__, 2, 3)); ++ __THROW __nonnull ((2)) __wur ++ __fortified_attr_access (__write_only__, 2, 3); + + /* Return 1 if FD is a valid descriptor associated + with a terminal, zero if not. */ +@@ -836,7 +837,8 @@ extern int symlink (const char *__from, const char *__to) + Returns the number of characters read, or -1 for errors. */ + extern ssize_t readlink (const char *__restrict __path, + char *__restrict __buf, size_t __len) +- __THROW __nonnull ((1, 2)) __wur __attr_access ((__write_only__, 2, 3)); ++ __THROW __nonnull ((1, 2)) __wur ++ __fortified_attr_access (__write_only__, 2, 3); + + #endif /* Use POSIX.1-2001. */ + +@@ -848,7 +850,8 @@ extern int symlinkat (const char *__from, int __tofd, + /* Like readlink but a relative PATH is interpreted relative to FD. */ + extern ssize_t readlinkat (int __fd, const char *__restrict __path, + char *__restrict __buf, size_t __len) +- __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4)); ++ __THROW __nonnull ((2, 3)) __wur ++ __fortified_attr_access (__write_only__, 3, 4); + #endif + + /* Remove the link NAME. */ +@@ -884,7 +887,7 @@ extern char *getlogin (void); + This function is a possible cancellation point and therefore not + marked with __THROW. */ + extern int getlogin_r (char *__name, size_t __name_len) __nonnull ((1)) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + #endif + + #ifdef __USE_MISC +@@ -906,7 +909,7 @@ extern int setlogin (const char *__name) __THROW __nonnull ((1)); + The result is null-terminated if LEN is large enough for the full + name and the terminator. */ + extern int gethostname (char *__name, size_t __len) __THROW __nonnull ((1)) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + #endif + + +@@ -925,7 +928,8 @@ extern int sethostid (long int __id) __THROW __wur; + Called just like `gethostname' and `sethostname'. + The NIS domain name is usually the empty string when not using NIS. */ + extern int getdomainname (char *__name, size_t __len) +- __THROW __nonnull ((1)) __wur __attr_access ((__write_only__, 1, 2)); ++ __THROW __nonnull ((1)) __wur ++ __fortified_attr_access (__write_only__, 1, 2); + extern int setdomainname (const char *__name, size_t __len) + __THROW __nonnull ((1)) __wur __attr_access ((__read_only__, 1, 2)); + +diff --git a/stdlib/stdlib.h b/stdlib/stdlib.h +index 0481c1235514f6e7..74c00eee73e4009d 100644 +--- a/stdlib/stdlib.h ++++ b/stdlib/stdlib.h +@@ -943,7 +943,8 @@ extern size_t mbstowcs (wchar_t *__restrict __pwcs, + extern size_t wcstombs (char *__restrict __s, + const wchar_t *__restrict __pwcs, size_t __n) + __THROW +- __attr_access ((__write_only__, 1, 3)) __attr_access ((__read_only__, 2)); ++ __fortified_attr_access (__write_only__, 1, 3) ++ __attr_access ((__read_only__, 2)); + + #ifdef __USE_MISC + /* Determine whether the string value of RESPONSE matches the affirmation +@@ -997,7 +998,7 @@ extern char *ptsname (int __fd) __THROW __wur; + terminal associated with the master FD is open on in BUF. + Return 0 on success, otherwise an error number. */ + extern int ptsname_r (int __fd, char *__buf, size_t __buflen) +- __THROW __nonnull ((2)) __attr_access ((__write_only__, 2, 3)); ++ __THROW __nonnull ((2)) __fortified_attr_access (__write_only__, 2, 3); + + /* Open a master pseudo terminal and return its file descriptor. */ + extern int getpt (void); +diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h +index 67ae2c6b50435368..5731274848260ad2 100644 +--- a/string/bits/string_fortified.h ++++ b/string/bits/string_fortified.h +@@ -64,7 +64,7 @@ __NTH (memset (void *__dest, int __ch, size_t __len)) + # include + + void __explicit_bzero_chk (void *__dest, size_t __len, size_t __destlen) +- __THROW __nonnull ((1)) __attr_access ((__write_only__, 1, 2)); ++ __THROW __nonnull ((1)) __fortified_attr_access (__write_only__, 1, 2); + + __fortify_function void + __NTH (explicit_bzero (void *__dest, size_t __len)) +@@ -106,7 +106,8 @@ __NTH (stpncpy (char *__dest, const char *__src, size_t __n)) + #else + extern char *__stpncpy_chk (char *__dest, const char *__src, size_t __n, + size_t __destlen) __THROW +- __attr_access ((__write_only__, 1, 3)) __attr_access ((__read_only__, 2)); ++ __fortified_attr_access ((__write_only__, 1, 3)) ++ __attr_access ((__read_only__, 2)); + extern char *__REDIRECT_NTH (__stpncpy_alias, (char *__dest, const char *__src, + size_t __n), stpncpy); + +diff --git a/string/string.h b/string/string.h +index 04e1b7067dc31d3c..8dcafb4ac4952853 100644 +--- a/string/string.h ++++ b/string/string.h +@@ -448,7 +448,7 @@ extern char *strerror_l (int __errnum, locale_t __l) __THROW; + /* Set N bytes of S to 0. The compiler will not delete a call to this + function, even if S is dead after the call. */ + extern void explicit_bzero (void *__s, size_t __n) __THROW __nonnull ((1)) +- __attr_access ((__write_only__, 1, 2)); ++ __fortified_attr_access (__write_only__, 1, 2); + + /* Return the next DELIM-delimited token from *STRINGP, + terminating it with a '\0', and update *STRINGP to point past it. */ diff --git a/SOURCES/glibc-upstream-2.34-126.patch b/SOURCES/glibc-upstream-2.34-126.patch new file mode 100644 index 0000000..93d236d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-126.patch @@ -0,0 +1,1038 @@ +commit 0e6ebf06e43e879d9efa2a1c79b2a55f81653663 +Author: Siddhesh Poyarekar +Date: Wed Oct 20 18:12:41 2021 +0530 + + Make sure that the fortified function conditionals are constant + + In _FORTIFY_SOURCE=3, the size expression may be non-constant, + resulting in branches in the inline functions remaining intact and + causing a tiny overhead. Clang (and in future, gcc) make sure that + the -1 case is always safe, i.e. any comparison of the generated + expression with (size_t)-1 is always false so that bit is taken care + of. The rest is avoidable since we want the _chk variant whenever we + have a size expression and it's not -1. + + Rework the conditionals in a uniform way to clearly indicate two + conditions at compile time: + + - Either the size is unknown (-1) or we know at compile time that the + operation length is less than the object size. We can call the + original function in this case. It could be that either the length, + object size or both are non-constant, but the compiler, through + range analysis, is able to fold the *comparison* to a constant. + + - The size and length are known and the compiler can see at compile + time that operation length > object size. This is valid grounds for + a warning at compile time, followed by emitting the _chk variant. + + For everything else, emit the _chk variant. + + This simplifies most of the fortified function implementations and at + the same time, ensures that only one call from _chk or the regular + function is emitted. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit a643f60c53876be0d57b4b7373770e6cb356fd13) + +diff --git a/io/bits/poll2.h b/io/bits/poll2.h +index be74d020f2e0e434..91cdcaf66a427cea 100644 +--- a/io/bits/poll2.h ++++ b/io/bits/poll2.h +@@ -36,16 +36,9 @@ extern int __REDIRECT (__poll_chk_warn, (struct pollfd *__fds, nfds_t __nfds, + __fortify_function __fortified_attr_access (__write_only__, 1, 2) int + poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) + { +- if (__glibc_objsize (__fds) != (__SIZE_TYPE__) -1) +- { +- if (! __builtin_constant_p (__nfds)) +- return __poll_chk (__fds, __nfds, __timeout, __glibc_objsize (__fds)); +- else if (__glibc_objsize (__fds) / sizeof (*__fds) < __nfds) +- return __poll_chk_warn (__fds, __nfds, __timeout, +- __glibc_objsize (__fds)); +- } +- +- return __poll_alias (__fds, __nfds, __timeout); ++ return __glibc_fortify (poll, __nfds, sizeof (*__fds), ++ __glibc_objsize (__fds), ++ __fds, __nfds, __timeout); + } + + +@@ -68,17 +61,9 @@ __fortify_function __fortified_attr_access (__write_only__, 1, 2) int + ppoll (struct pollfd *__fds, nfds_t __nfds, const struct timespec *__timeout, + const __sigset_t *__ss) + { +- if (__glibc_objsize (__fds) != (__SIZE_TYPE__) -1) +- { +- if (! __builtin_constant_p (__nfds)) +- return __ppoll_chk (__fds, __nfds, __timeout, __ss, +- __glibc_objsize (__fds)); +- else if (__glibc_objsize (__fds) / sizeof (*__fds) < __nfds) +- return __ppoll_chk_warn (__fds, __nfds, __timeout, __ss, +- __glibc_objsize (__fds)); +- } +- +- return __ppoll_alias (__fds, __nfds, __timeout, __ss); ++ return __glibc_fortify (ppoll, __nfds, sizeof (*__fds), ++ __glibc_objsize (__fds), ++ __fds, __nfds, __timeout, __ss); + } + #endif + +diff --git a/libio/bits/stdio2.h b/libio/bits/stdio2.h +index 4f016a563857a137..40ff16b01b4f4876 100644 +--- a/libio/bits/stdio2.h ++++ b/libio/bits/stdio2.h +@@ -261,15 +261,12 @@ extern char *__REDIRECT (__fgets_chk_warn, + __fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char * + fgets (char *__restrict __s, int __n, FILE *__restrict __stream) + { +- if (__glibc_objsize (__s) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n) || __n <= 0) +- return __fgets_chk (__s, __glibc_objsize (__s), __n, __stream); +- +- if ((size_t) __n > __glibc_objsize (__s)) +- return __fgets_chk_warn (__s, __glibc_objsize (__s), __n, __stream); +- } +- return __fgets_alias (__s, __n, __stream); ++ size_t sz = __glibc_objsize (__s); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) ++ return __fgets_alias (__s, __n, __stream); ++ if (__glibc_unsafe_len (__n, sizeof (char), sz)) ++ return __fgets_chk_warn (__s, sz, __n, __stream); ++ return __fgets_chk (__s, sz, __n, __stream); + } + + extern size_t __fread_chk (void *__restrict __ptr, size_t __ptrlen, +@@ -291,19 +288,12 @@ __fortify_function __wur size_t + fread (void *__restrict __ptr, size_t __size, size_t __n, + FILE *__restrict __stream) + { +- if (__glibc_objsize0 (__ptr) != (size_t) -1) +- { +- if (!__builtin_constant_p (__size) +- || !__builtin_constant_p (__n) +- || (__size | __n) >= (((size_t) 1) << (8 * sizeof (size_t) / 2))) +- return __fread_chk (__ptr, __glibc_objsize0 (__ptr), __size, __n, +- __stream); +- +- if (__size * __n > __glibc_objsize0 (__ptr)) +- return __fread_chk_warn (__ptr, __glibc_objsize0 (__ptr), __size, __n, +- __stream); +- } +- return __fread_alias (__ptr, __size, __n, __stream); ++ size_t sz = __glibc_objsize0 (__ptr); ++ if (__glibc_safe_or_unknown_len (__n, __size, sz)) ++ return __fread_alias (__ptr, __size, __n, __stream); ++ if (__glibc_unsafe_len (__n, __size, sz)) ++ return __fread_chk_warn (__ptr, sz, __size, __n, __stream); ++ return __fread_chk (__ptr, sz, __size, __n, __stream); + } + + #ifdef __USE_GNU +@@ -323,17 +313,12 @@ extern char *__REDIRECT (__fgets_unlocked_chk_warn, + __fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char * + fgets_unlocked (char *__restrict __s, int __n, FILE *__restrict __stream) + { +- if (__glibc_objsize (__s) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n) || __n <= 0) +- return __fgets_unlocked_chk (__s, __glibc_objsize (__s), __n, +- __stream); +- +- if ((size_t) __n > __glibc_objsize (__s)) +- return __fgets_unlocked_chk_warn (__s, __glibc_objsize (__s), __n, +- __stream); +- } +- return __fgets_unlocked_alias (__s, __n, __stream); ++ size_t sz = __glibc_objsize (__s); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) ++ return __fgets_unlocked_alias (__s, __n, __stream); ++ if (__glibc_unsafe_len (__n, sizeof (char), sz)) ++ return __fgets_unlocked_chk_warn (__s, sz, __n, __stream); ++ return __fgets_unlocked_chk (__s, sz, __n, __stream); + } + #endif + +@@ -358,41 +343,36 @@ __fortify_function __wur size_t + fread_unlocked (void *__restrict __ptr, size_t __size, size_t __n, + FILE *__restrict __stream) + { +- if (__glibc_objsize0 (__ptr) != (size_t) -1) ++ size_t sz = __glibc_objsize0 (__ptr); ++ if (__glibc_safe_or_unknown_len (__n, __size, sz)) + { +- if (!__builtin_constant_p (__size) +- || !__builtin_constant_p (__n) +- || (__size | __n) >= (((size_t) 1) << (8 * sizeof (size_t) / 2))) +- return __fread_unlocked_chk (__ptr, __glibc_objsize0 (__ptr), __size, +- __n, __stream); +- +- if (__size * __n > __glibc_objsize0 (__ptr)) +- return __fread_unlocked_chk_warn (__ptr, __glibc_objsize0 (__ptr), +- __size, __n, __stream); +- } +- + # ifdef __USE_EXTERN_INLINES +- if (__builtin_constant_p (__size) +- && __builtin_constant_p (__n) +- && (__size | __n) < (((size_t) 1) << (8 * sizeof (size_t) / 2)) +- && __size * __n <= 8) +- { +- size_t __cnt = __size * __n; +- char *__cptr = (char *) __ptr; +- if (__cnt == 0) +- return 0; +- +- for (; __cnt > 0; --__cnt) ++ if (__builtin_constant_p (__size) ++ && __builtin_constant_p (__n) ++ && (__size | __n) < (((size_t) 1) << (8 * sizeof (size_t) / 2)) ++ && __size * __n <= 8) + { +- int __c = getc_unlocked (__stream); +- if (__c == EOF) +- break; +- *__cptr++ = __c; ++ size_t __cnt = __size * __n; ++ char *__cptr = (char *) __ptr; ++ if (__cnt == 0) ++ return 0; ++ ++ for (; __cnt > 0; --__cnt) ++ { ++ int __c = getc_unlocked (__stream); ++ if (__c == EOF) ++ break; ++ *__cptr++ = __c; ++ } ++ return (__cptr - (char *) __ptr) / __size; + } +- return (__cptr - (char *) __ptr) / __size; +- } + # endif +- return __fread_unlocked_alias (__ptr, __size, __n, __stream); ++ return __fread_unlocked_alias (__ptr, __size, __n, __stream); ++ } ++ if (__glibc_unsafe_len (__n, __size, sz)) ++ return __fread_unlocked_chk_warn (__ptr, sz, __size, __n, __stream); ++ return __fread_unlocked_chk (__ptr, sz, __size, __n, __stream); ++ + } + #endif + +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index cd836441a9807d6a..4825ff0351c1e5d4 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -150,6 +150,53 @@ + # define __glibc_objsize(__o) __bos (__o) + #endif + ++/* Compile time conditions to choose between the regular, _chk and _chk_warn ++ variants. These conditions should get evaluated to constant and optimized ++ away. */ ++ ++#define __glibc_safe_len_cond(__l, __s, __osz) ((__l) <= (__osz) / (__s)) ++#define __glibc_unsigned_or_positive(__l) \ ++ ((__typeof (__l)) 0 < (__typeof (__l)) -1 \ ++ || (__builtin_constant_p (__l) && (__l) > 0)) ++ ++/* Length is known to be safe at compile time if the __L * __S <= __OBJSZ ++ condition can be folded to a constant and if it is true. The -1 check is ++ redundant because since it implies that __glibc_safe_len_cond is true. */ ++#define __glibc_safe_or_unknown_len(__l, __s, __osz) \ ++ (__glibc_unsigned_or_positive (__l) \ ++ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ ++ __s, __osz)) \ ++ && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz)) ++ ++/* Conversely, we know at compile time that the length is safe if the ++ __L * __S <= __OBJSZ condition can be folded to a constant and if it is ++ false. */ ++#define __glibc_unsafe_len(__l, __s, __osz) \ ++ (__glibc_unsigned_or_positive (__l) \ ++ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ ++ __s, __osz)) \ ++ && !__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz)) ++ ++/* Fortify function f. __f_alias, __f_chk and __f_chk_warn must be ++ declared. */ ++ ++#define __glibc_fortify(f, __l, __s, __osz, ...) \ ++ (__glibc_safe_or_unknown_len (__l, __s, __osz) \ ++ ? __ ## f ## _alias (__VA_ARGS__) \ ++ : (__glibc_unsafe_len (__l, __s, __osz) \ ++ ? __ ## f ## _chk_warn (__VA_ARGS__, __osz) \ ++ : __ ## f ## _chk (__VA_ARGS__, __osz))) \ ++ ++/* Fortify function f, where object size argument passed to f is the number of ++ elements and not total size. */ ++ ++#define __glibc_fortify_n(f, __l, __s, __osz, ...) \ ++ (__glibc_safe_or_unknown_len (__l, __s, __osz) \ ++ ? __ ## f ## _alias (__VA_ARGS__) \ ++ : (__glibc_unsafe_len (__l, __s, __osz) \ ++ ? __ ## f ## _chk_warn (__VA_ARGS__, (__osz) / (__s)) \ ++ : __ ## f ## _chk (__VA_ARGS__, (__osz) / (__s)))) \ ++ + #if __GNUC_PREREQ (4,3) + # define __warnattr(msg) __attribute__((__warning__ (msg))) + # define __errordecl(name, msg) \ +diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h +index 622adeb2b28ed298..697dcbbf7b4b26f6 100644 +--- a/posix/bits/unistd.h ++++ b/posix/bits/unistd.h +@@ -35,16 +35,9 @@ extern ssize_t __REDIRECT (__read_chk_warn, + __fortify_function __wur ssize_t + read (int __fd, void *__buf, size_t __nbytes) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__nbytes)) +- return __read_chk (__fd, __buf, __nbytes, __glibc_objsize0 (__buf)); +- +- if (__nbytes > __glibc_objsize0 (__buf)) +- return __read_chk_warn (__fd, __buf, __nbytes, +- __glibc_objsize0 (__buf)); +- } +- return __read_alias (__fd, __buf, __nbytes); ++ return __glibc_fortify (read, __nbytes, sizeof (char), ++ __glibc_objsize0 (__buf), ++ __fd, __buf, __nbytes); + } + + #ifdef __USE_UNIX98 +@@ -78,34 +71,17 @@ extern ssize_t __REDIRECT (__pread64_chk_warn, + __fortify_function __wur ssize_t + pread (int __fd, void *__buf, size_t __nbytes, __off_t __offset) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__nbytes)) +- return __pread_chk (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- +- if ( __nbytes > __glibc_objsize0 (__buf)) +- return __pread_chk_warn (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- } +- return __pread_alias (__fd, __buf, __nbytes, __offset); ++ return __glibc_fortify (pread, __nbytes, sizeof (char), ++ __glibc_objsize0 (__buf), ++ __fd, __buf, __nbytes, __offset); + } + # else + __fortify_function __wur ssize_t + pread (int __fd, void *__buf, size_t __nbytes, __off64_t __offset) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__nbytes)) +- return __pread64_chk (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- +- if ( __nbytes > __glibc_objsize0 (__buf)) +- return __pread64_chk_warn (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- } +- +- return __pread64_alias (__fd, __buf, __nbytes, __offset); ++ return __glibc_fortify (pread64, __nbytes, sizeof (char), ++ __glibc_objsize0 (__buf), ++ __fd, __buf, __nbytes, __offset); + } + # endif + +@@ -113,18 +89,9 @@ pread (int __fd, void *__buf, size_t __nbytes, __off64_t __offset) + __fortify_function __wur ssize_t + pread64 (int __fd, void *__buf, size_t __nbytes, __off64_t __offset) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__nbytes)) +- return __pread64_chk (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- +- if ( __nbytes > __glibc_objsize0 (__buf)) +- return __pread64_chk_warn (__fd, __buf, __nbytes, __offset, +- __glibc_objsize0 (__buf)); +- } +- +- return __pread64_alias (__fd, __buf, __nbytes, __offset); ++ return __glibc_fortify (pread64, __nbytes, sizeof (char), ++ __glibc_objsize0 (__buf), ++ __fd, __buf, __nbytes, __offset); + } + # endif + #endif +@@ -149,16 +116,9 @@ __fortify_function __nonnull ((1, 2)) __wur ssize_t + __NTH (readlink (const char *__restrict __path, char *__restrict __buf, + size_t __len)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __readlink_chk (__path, __buf, __len, __glibc_objsize (__buf)); +- +- if ( __len > __glibc_objsize (__buf)) +- return __readlink_chk_warn (__path, __buf, __len, +- __glibc_objsize (__buf)); +- } +- return __readlink_alias (__path, __buf, __len); ++ return __glibc_fortify (readlink, __len, sizeof (char), ++ __glibc_objsize (__buf), ++ __path, __buf, __len); + } + #endif + +@@ -184,17 +144,9 @@ __fortify_function __nonnull ((2, 3)) __wur ssize_t + __NTH (readlinkat (int __fd, const char *__restrict __path, + char *__restrict __buf, size_t __len)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __readlinkat_chk (__fd, __path, __buf, __len, +- __glibc_objsize (__buf)); +- +- if (__len > __glibc_objsize (__buf)) +- return __readlinkat_chk_warn (__fd, __path, __buf, __len, +- __glibc_objsize (__buf)); +- } +- return __readlinkat_alias (__fd, __path, __buf, __len); ++ return __glibc_fortify (readlinkat, __len, sizeof (char), ++ __glibc_objsize (__buf), ++ __fd, __path, __buf, __len); + } + #endif + +@@ -211,15 +163,9 @@ extern char *__REDIRECT_NTH (__getcwd_chk_warn, + __fortify_function __wur char * + __NTH (getcwd (char *__buf, size_t __size)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__size)) +- return __getcwd_chk (__buf, __size, __glibc_objsize (__buf)); +- +- if (__size > __glibc_objsize (__buf)) +- return __getcwd_chk_warn (__buf, __size, __glibc_objsize (__buf)); +- } +- return __getcwd_alias (__buf, __size); ++ return __glibc_fortify (getcwd, __size, sizeof (char), ++ __glibc_objsize (__buf), ++ __buf, __size); + } + + #if defined __USE_MISC || defined __USE_XOPEN_EXTENDED +@@ -253,16 +199,9 @@ extern size_t __REDIRECT_NTH (__confstr_chk_warn, + __fortify_function size_t + __NTH (confstr (int __name, char *__buf, size_t __len)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __confstr_chk (__name, __buf, __len, __glibc_objsize (__buf)); +- +- if (__glibc_objsize (__buf) < __len) +- return __confstr_chk_warn (__name, __buf, __len, +- __glibc_objsize (__buf)); +- } +- return __confstr_alias (__name, __buf, __len); ++ return __glibc_fortify (confstr, __len, sizeof (char), ++ __glibc_objsize (__buf), ++ __name, __buf, __len); + } + + +@@ -279,15 +218,9 @@ extern int __REDIRECT_NTH (__getgroups_chk_warn, + __fortify_function int + __NTH (getgroups (int __size, __gid_t __list[])) + { +- if (__glibc_objsize (__list) != (size_t) -1) +- { +- if (!__builtin_constant_p (__size) || __size < 0) +- return __getgroups_chk (__size, __list, __glibc_objsize (__list)); +- +- if (__size * sizeof (__gid_t) > __glibc_objsize (__list)) +- return __getgroups_chk_warn (__size, __list, __glibc_objsize (__list)); +- } +- return __getgroups_alias (__size, __list); ++ return __glibc_fortify (getgroups, __size, sizeof (__gid_t), ++ __glibc_objsize (__list), ++ __size, __list); + } + + +@@ -306,17 +239,9 @@ extern int __REDIRECT_NTH (__ttyname_r_chk_warn, + __fortify_function int + __NTH (ttyname_r (int __fd, char *__buf, size_t __buflen)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__buflen)) +- return __ttyname_r_chk (__fd, __buf, __buflen, +- __glibc_objsize (__buf)); +- +- if (__buflen > __glibc_objsize (__buf)) +- return __ttyname_r_chk_warn (__fd, __buf, __buflen, +- __glibc_objsize (__buf)); +- } +- return __ttyname_r_alias (__fd, __buf, __buflen); ++ return __glibc_fortify (ttyname_r, __buflen, sizeof (char), ++ __glibc_objsize (__buf), ++ __fd, __buf, __buflen); + } + + +@@ -334,16 +259,9 @@ extern int __REDIRECT (__getlogin_r_chk_warn, + __fortify_function int + getlogin_r (char *__buf, size_t __buflen) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__buflen)) +- return __getlogin_r_chk (__buf, __buflen, __glibc_objsize (__buf)); +- +- if (__buflen > __glibc_objsize (__buf)) +- return __getlogin_r_chk_warn (__buf, __buflen, +- __glibc_objsize (__buf)); +- } +- return __getlogin_r_alias (__buf, __buflen); ++ return __glibc_fortify (getlogin_r, __buflen, sizeof (char), ++ __glibc_objsize (__buf), ++ __buf, __buflen); + } + #endif + +@@ -363,16 +281,9 @@ extern int __REDIRECT_NTH (__gethostname_chk_warn, + __fortify_function int + __NTH (gethostname (char *__buf, size_t __buflen)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__buflen)) +- return __gethostname_chk (__buf, __buflen, __glibc_objsize (__buf)); +- +- if (__buflen > __glibc_objsize (__buf)) +- return __gethostname_chk_warn (__buf, __buflen, +- __glibc_objsize (__buf)); +- } +- return __gethostname_alias (__buf, __buflen); ++ return __glibc_fortify (gethostname, __buflen, sizeof (char), ++ __glibc_objsize (__buf), ++ __buf, __buflen); + } + #endif + +@@ -394,15 +305,8 @@ extern int __REDIRECT_NTH (__getdomainname_chk_warn, + __fortify_function int + __NTH (getdomainname (char *__buf, size_t __buflen)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__buflen)) +- return __getdomainname_chk (__buf, __buflen, __glibc_objsize (__buf)); +- +- if (__buflen > __glibc_objsize (__buf)) +- return __getdomainname_chk_warn (__buf, __buflen, +- __glibc_objsize (__buf)); +- } +- return __getdomainname_alias (__buf, __buflen); ++ return __glibc_fortify (getdomainname, __buflen, sizeof (char), ++ __glibc_objsize (__buf), ++ __buf, __buflen); + } + #endif +diff --git a/socket/bits/socket2.h b/socket/bits/socket2.h +index 9c8ac69624ea4f78..b28cde55f3fd9c16 100644 +--- a/socket/bits/socket2.h ++++ b/socket/bits/socket2.h +@@ -33,17 +33,12 @@ extern ssize_t __REDIRECT (__recv_chk_warn, + __fortify_function ssize_t + recv (int __fd, void *__buf, size_t __n, int __flags) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __recv_chk (__fd, __buf, __n, __glibc_objsize0 (__buf), +- __flags); +- +- if (__n > __glibc_objsize0 (__buf)) +- return __recv_chk_warn (__fd, __buf, __n, __glibc_objsize0 (__buf), +- __flags); +- } +- return __recv_alias (__fd, __buf, __n, __flags); ++ size_t sz = __glibc_objsize0 (__buf); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) ++ return __recv_alias (__fd, __buf, __n, __flags); ++ if (__glibc_unsafe_len (__n, sizeof (char), sz)) ++ return __recv_chk_warn (__fd, __buf, __n, sz, __flags); ++ return __recv_chk (__fd, __buf, __n, sz, __flags); + } + + extern ssize_t __recvfrom_chk (int __fd, void *__restrict __buf, size_t __n, +@@ -66,14 +61,11 @@ __fortify_function ssize_t + recvfrom (int __fd, void *__restrict __buf, size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t *__restrict __addr_len) + { +- if (__glibc_objsize0 (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __recvfrom_chk (__fd, __buf, __n, __glibc_objsize0 (__buf), +- __flags, __addr, __addr_len); +- if (__n > __glibc_objsize0 (__buf)) +- return __recvfrom_chk_warn (__fd, __buf, __n, __glibc_objsize0 (__buf), +- __flags, __addr, __addr_len); +- } +- return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len); ++ size_t sz = __glibc_objsize0 (__buf); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz)) ++ return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len); ++ if (__glibc_unsafe_len (__n, sizeof (char), sz)) ++ return __recvfrom_chk_warn (__fd, __buf, __n, sz, __flags, __addr, ++ __addr_len); ++ return __recvfrom_chk (__fd, __buf, __n, sz, __flags, __addr, __addr_len); + } +diff --git a/stdlib/bits/stdlib.h b/stdlib/bits/stdlib.h +index eae31b38f0475c2e..067115eeca123c6d 100644 +--- a/stdlib/bits/stdlib.h ++++ b/stdlib/bits/stdlib.h +@@ -36,17 +36,16 @@ extern char *__REDIRECT_NTH (__realpath_chk_warn, + __fortify_function __wur char * + __NTH (realpath (const char *__restrict __name, char *__restrict __resolved)) + { +- if (__glibc_objsize (__resolved) != (size_t) -1) +- { ++ size_t sz = __glibc_objsize (__resolved); ++ ++ if (sz == (size_t) -1) ++ return __realpath_alias (__name, __resolved); ++ + #if defined _LIBC_LIMITS_H_ && defined PATH_MAX +- if (__glibc_objsize (__resolved) < PATH_MAX) +- return __realpath_chk_warn (__name, __resolved, +- __glibc_objsize (__resolved)); ++ if (__glibc_unsafe_len (sz, sizeof (char), PATH_MAX)) ++ return __realpath_chk_warn (__name, __resolved, sz); + #endif +- return __realpath_chk (__name, __resolved, __glibc_objsize (__resolved)); +- } +- +- return __realpath_alias (__name, __resolved); ++ return __realpath_chk (__name, __resolved, sz); + } + + +@@ -65,16 +64,9 @@ extern int __REDIRECT_NTH (__ptsname_r_chk_warn, + __fortify_function int + __NTH (ptsname_r (int __fd, char *__buf, size_t __buflen)) + { +- if (__glibc_objsize (__buf) != (size_t) -1) +- { +- if (!__builtin_constant_p (__buflen)) +- return __ptsname_r_chk (__fd, __buf, __buflen, +- __glibc_objsize (__buf)); +- if (__buflen > __glibc_objsize (__buf)) +- return __ptsname_r_chk_warn (__fd, __buf, __buflen, +- __glibc_objsize (__buf)); +- } +- return __ptsname_r_alias (__fd, __buf, __buflen); ++ return __glibc_fortify (ptsname_r, __buflen, sizeof (char), ++ __glibc_objsize (__buf), ++ __fd, __buf, __buflen); + } + + +@@ -120,18 +112,9 @@ __fortify_function size_t + __NTH (mbstowcs (wchar_t *__restrict __dst, const char *__restrict __src, + size_t __len)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __mbstowcs_chk (__dst, __src, __len, +- __glibc_objsize (__dst) / sizeof (wchar_t)); +- +- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t)) +- return __mbstowcs_chk_warn (__dst, __src, __len, +- (__glibc_objsize (__dst) +- / sizeof (wchar_t))); +- } +- return __mbstowcs_alias (__dst, __src, __len); ++ return __glibc_fortify_n (mbstowcs, __len, sizeof (wchar_t), ++ __glibc_objsize (__dst), ++ __dst, __src, __len); + } + + +@@ -154,13 +137,7 @@ __fortify_function size_t + __NTH (wcstombs (char *__restrict __dst, const wchar_t *__restrict __src, + size_t __len)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __wcstombs_chk (__dst, __src, __len, __glibc_objsize (__dst)); +- if (__len > __glibc_objsize (__dst)) +- return __wcstombs_chk_warn (__dst, __src, __len, +- __glibc_objsize (__dst)); +- } +- return __wcstombs_alias (__dst, __src, __len); ++ return __glibc_fortify (wcstombs, __len, sizeof (char), ++ __glibc_objsize (__dst), ++ __dst, __src, __len); + } +diff --git a/wcsmbs/bits/wchar2.h b/wcsmbs/bits/wchar2.h +index ea2518dc726aae52..26012ef9366c0b88 100644 +--- a/wcsmbs/bits/wchar2.h ++++ b/wcsmbs/bits/wchar2.h +@@ -39,17 +39,9 @@ __fortify_function wchar_t * + __NTH (wmemcpy (wchar_t *__restrict __s1, const wchar_t *__restrict __s2, + size_t __n)) + { +- if (__glibc_objsize0 (__s1) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wmemcpy_chk (__s1, __s2, __n, +- __glibc_objsize0 (__s1) / sizeof (wchar_t)); +- +- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t)) +- return __wmemcpy_chk_warn (__s1, __s2, __n, +- __glibc_objsize0 (__s1) / sizeof (wchar_t)); +- } +- return __wmemcpy_alias (__s1, __s2, __n); ++ return __glibc_fortify_n (wmemcpy, __n, sizeof (wchar_t), ++ __glibc_objsize0 (__s1), ++ __s1, __s2, __n); + } + + +@@ -67,18 +59,9 @@ extern wchar_t *__REDIRECT_NTH (__wmemmove_chk_warn, + __fortify_function wchar_t * + __NTH (wmemmove (wchar_t *__s1, const wchar_t *__s2, size_t __n)) + { +- if (__glibc_objsize0 (__s1) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wmemmove_chk (__s1, __s2, __n, +- __glibc_objsize0 (__s1) / sizeof (wchar_t)); +- +- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t)) +- return __wmemmove_chk_warn (__s1, __s2, __n, +- (__glibc_objsize0 (__s1) +- / sizeof (wchar_t))); +- } +- return __wmemmove_alias (__s1, __s2, __n); ++ return __glibc_fortify_n (wmemmove, __n, sizeof (wchar_t), ++ __glibc_objsize0 (__s1), ++ __s1, __s2, __n); + } + + +@@ -101,18 +84,9 @@ __fortify_function wchar_t * + __NTH (wmempcpy (wchar_t *__restrict __s1, const wchar_t *__restrict __s2, + size_t __n)) + { +- if (__glibc_objsize0 (__s1) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wmempcpy_chk (__s1, __s2, __n, +- __glibc_objsize0 (__s1) / sizeof (wchar_t)); +- +- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t)) +- return __wmempcpy_chk_warn (__s1, __s2, __n, +- (__glibc_objsize0 (__s1) +- / sizeof (wchar_t))); +- } +- return __wmempcpy_alias (__s1, __s2, __n); ++ return __glibc_fortify_n (wmempcpy, __n, sizeof (wchar_t), ++ __glibc_objsize0 (__s1), ++ __s1, __s2, __n); + } + #endif + +@@ -130,17 +104,9 @@ extern wchar_t *__REDIRECT_NTH (__wmemset_chk_warn, + __fortify_function wchar_t * + __NTH (wmemset (wchar_t *__s, wchar_t __c, size_t __n)) + { +- if (__glibc_objsize0 (__s) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wmemset_chk (__s, __c, __n, +- __glibc_objsize0 (__s) / sizeof (wchar_t)); +- +- if (__n > __glibc_objsize0 (__s) / sizeof (wchar_t)) +- return __wmemset_chk_warn (__s, __c, __n, +- __glibc_objsize0 (__s) / sizeof (wchar_t)); +- } +- return __wmemset_alias (__s, __c, __n); ++ return __glibc_fortify_n (wmemset, __n, sizeof (wchar_t), ++ __glibc_objsize0 (__s), ++ __s, __c, __n); + } + + +@@ -154,9 +120,9 @@ extern wchar_t *__REDIRECT_NTH (__wcscpy_alias, + __fortify_function wchar_t * + __NTH (wcscpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- return __wcscpy_chk (__dest, __src, +- __glibc_objsize (__dest) / sizeof (wchar_t)); ++ size_t sz = __glibc_objsize (__dest); ++ if (sz != (size_t) -1) ++ return __wcscpy_chk (__dest, __src, sz / sizeof (wchar_t)); + return __wcscpy_alias (__dest, __src); + } + +@@ -171,9 +137,9 @@ extern wchar_t *__REDIRECT_NTH (__wcpcpy_alias, + __fortify_function wchar_t * + __NTH (wcpcpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- return __wcpcpy_chk (__dest, __src, +- __glibc_objsize (__dest) / sizeof (wchar_t)); ++ size_t sz = __glibc_objsize (__dest); ++ if (sz != (size_t) -1) ++ return __wcpcpy_chk (__dest, __src, sz / sizeof (wchar_t)); + return __wcpcpy_alias (__dest, __src); + } + +@@ -196,17 +162,9 @@ __fortify_function wchar_t * + __NTH (wcsncpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src, + size_t __n)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wcsncpy_chk (__dest, __src, __n, +- __glibc_objsize (__dest) / sizeof (wchar_t)); +- if (__n > __glibc_objsize (__dest) / sizeof (wchar_t)) +- return __wcsncpy_chk_warn (__dest, __src, __n, +- (__glibc_objsize (__dest) +- / sizeof (wchar_t))); +- } +- return __wcsncpy_alias (__dest, __src, __n); ++ return __glibc_fortify_n (wcsncpy, __n, sizeof (wchar_t), ++ __glibc_objsize (__dest), ++ __dest, __src, __n); + } + + +@@ -228,17 +186,9 @@ __fortify_function wchar_t * + __NTH (wcpncpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src, + size_t __n)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n)) +- return __wcpncpy_chk (__dest, __src, __n, +- __glibc_objsize (__dest) / sizeof (wchar_t)); +- if (__n > __glibc_objsize (__dest) / sizeof (wchar_t)) +- return __wcpncpy_chk_warn (__dest, __src, __n, +- (__glibc_objsize (__dest) +- / sizeof (wchar_t))); +- } +- return __wcpncpy_alias (__dest, __src, __n); ++ return __glibc_fortify_n (wcpncpy, __n, sizeof (wchar_t), ++ __glibc_objsize (__dest), ++ __dest, __src, __n); + } + + +@@ -252,9 +202,9 @@ extern wchar_t *__REDIRECT_NTH (__wcscat_alias, + __fortify_function wchar_t * + __NTH (wcscat (wchar_t *__restrict __dest, const wchar_t *__restrict __src)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- return __wcscat_chk (__dest, __src, +- __glibc_objsize (__dest) / sizeof (wchar_t)); ++ size_t sz = __glibc_objsize (__dest); ++ if (sz != (size_t) -1) ++ return __wcscat_chk (__dest, __src, sz / sizeof (wchar_t)); + return __wcscat_alias (__dest, __src); + } + +@@ -271,9 +221,9 @@ __fortify_function wchar_t * + __NTH (wcsncat (wchar_t *__restrict __dest, const wchar_t *__restrict __src, + size_t __n)) + { +- if (__glibc_objsize (__dest) != (size_t) -1) +- return __wcsncat_chk (__dest, __src, __n, +- __glibc_objsize (__dest) / sizeof (wchar_t)); ++ size_t sz = __glibc_objsize (__dest); ++ if (sz != (size_t) -1) ++ return __wcsncat_chk (__dest, __src, __n, sz / sizeof (wchar_t)); + return __wcsncat_alias (__dest, __src, __n); + } + +@@ -293,10 +243,10 @@ __fortify_function int + __NTH (swprintf (wchar_t *__restrict __s, size_t __n, + const wchar_t *__restrict __fmt, ...)) + { +- if (__glibc_objsize (__s) != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) ++ size_t sz = __glibc_objsize (__s); ++ if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + return __swprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, +- __glibc_objsize (__s) / sizeof (wchar_t), +- __fmt, __va_arg_pack ()); ++ sz / sizeof (wchar_t), __fmt, __va_arg_pack ()); + return __swprintf_alias (__s, __n, __fmt, __va_arg_pack ()); + } + #elif !defined __cplusplus +@@ -323,10 +273,10 @@ __fortify_function int + __NTH (vswprintf (wchar_t *__restrict __s, size_t __n, + const wchar_t *__restrict __fmt, __gnuc_va_list __ap)) + { +- if (__glibc_objsize (__s) != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) ++ size_t sz = __glibc_objsize (__s); ++ if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1) + return __vswprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, +- __glibc_objsize (__s) / sizeof (wchar_t), __fmt, +- __ap); ++ sz / sizeof (wchar_t), __fmt, __ap); + return __vswprintf_alias (__s, __n, __fmt, __ap); + } + +@@ -392,18 +342,12 @@ extern wchar_t *__REDIRECT (__fgetws_chk_warn, + __fortify_function __wur wchar_t * + fgetws (wchar_t *__restrict __s, int __n, __FILE *__restrict __stream) + { +- if (__glibc_objsize (__s) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n) || __n <= 0) +- return __fgetws_chk (__s, __glibc_objsize (__s) / sizeof (wchar_t), +- __n, __stream); +- +- if ((size_t) __n > __glibc_objsize (__s) / sizeof (wchar_t)) +- return __fgetws_chk_warn (__s, +- __glibc_objsize (__s) / sizeof (wchar_t), +- __n, __stream); +- } +- return __fgetws_alias (__s, __n, __stream); ++ size_t sz = __glibc_objsize (__s); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) ++ return __fgetws_alias (__s, __n, __stream); ++ if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) ++ return __fgetws_chk_warn (__s, sz / sizeof (wchar_t), __n, __stream); ++ return __fgetws_chk (__s, sz / sizeof (wchar_t), __n, __stream); + } + + #ifdef __USE_GNU +@@ -424,20 +368,13 @@ extern wchar_t *__REDIRECT (__fgetws_unlocked_chk_warn, + __fortify_function __wur wchar_t * + fgetws_unlocked (wchar_t *__restrict __s, int __n, __FILE *__restrict __stream) + { +- if (__glibc_objsize (__s) != (size_t) -1) +- { +- if (!__builtin_constant_p (__n) || __n <= 0) +- return __fgetws_unlocked_chk (__s, +- __glibc_objsize (__s) / sizeof (wchar_t), +- __n, __stream); +- +- if ((size_t) __n > __glibc_objsize (__s) / sizeof (wchar_t)) +- return __fgetws_unlocked_chk_warn (__s, +- (__glibc_objsize (__s) +- / sizeof (wchar_t)), +- __n, __stream); +- } +- return __fgetws_unlocked_alias (__s, __n, __stream); ++ size_t sz = __glibc_objsize (__s); ++ if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz)) ++ return __fgetws_unlocked_alias (__s, __n, __stream); ++ if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz)) ++ return __fgetws_unlocked_chk_warn (__s, sz / sizeof (wchar_t), __n, ++ __stream); ++ return __fgetws_unlocked_chk (__s, sz / sizeof (wchar_t), __n, __stream); + } + #endif + +@@ -488,18 +425,9 @@ __fortify_function size_t + __NTH (mbsrtowcs (wchar_t *__restrict __dst, const char **__restrict __src, + size_t __len, mbstate_t *__restrict __ps)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __mbsrtowcs_chk (__dst, __src, __len, __ps, +- __glibc_objsize (__dst) / sizeof (wchar_t)); +- +- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t)) +- return __mbsrtowcs_chk_warn (__dst, __src, __len, __ps, +- (__glibc_objsize (__dst) +- / sizeof (wchar_t))); +- } +- return __mbsrtowcs_alias (__dst, __src, __len, __ps); ++ return __glibc_fortify_n (mbsrtowcs, __len, sizeof (wchar_t), ++ __glibc_objsize (__dst), ++ __dst, __src, __len, __ps); + } + + +@@ -523,17 +451,9 @@ __fortify_function size_t + __NTH (wcsrtombs (char *__restrict __dst, const wchar_t **__restrict __src, + size_t __len, mbstate_t *__restrict __ps)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __wcsrtombs_chk (__dst, __src, __len, __ps, +- __glibc_objsize (__dst)); +- +- if (__len > __glibc_objsize (__dst)) +- return __wcsrtombs_chk_warn (__dst, __src, __len, __ps, +- __glibc_objsize (__dst)); +- } +- return __wcsrtombs_alias (__dst, __src, __len, __ps); ++ return __glibc_fortify (wcsrtombs, __len, sizeof (char), ++ __glibc_objsize (__dst), ++ __dst, __src, __len, __ps); + } + + +@@ -559,18 +479,9 @@ __fortify_function size_t + __NTH (mbsnrtowcs (wchar_t *__restrict __dst, const char **__restrict __src, + size_t __nmc, size_t __len, mbstate_t *__restrict __ps)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __mbsnrtowcs_chk (__dst, __src, __nmc, __len, __ps, +- __glibc_objsize (__dst) / sizeof (wchar_t)); +- +- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t)) +- return __mbsnrtowcs_chk_warn (__dst, __src, __nmc, __len, __ps, +- (__glibc_objsize (__dst) +- / sizeof (wchar_t))); +- } +- return __mbsnrtowcs_alias (__dst, __src, __nmc, __len, __ps); ++ return __glibc_fortify_n (mbsnrtowcs, __len, sizeof (wchar_t), ++ __glibc_objsize (__dst), ++ __dst, __src, __nmc, __len, __ps); + } + + +@@ -596,16 +507,8 @@ __fortify_function size_t + __NTH (wcsnrtombs (char *__restrict __dst, const wchar_t **__restrict __src, + size_t __nwc, size_t __len, mbstate_t *__restrict __ps)) + { +- if (__glibc_objsize (__dst) != (size_t) -1) +- { +- if (!__builtin_constant_p (__len)) +- return __wcsnrtombs_chk (__dst, __src, __nwc, __len, __ps, +- __glibc_objsize (__dst)); +- +- if (__len > __glibc_objsize (__dst)) +- return __wcsnrtombs_chk_warn (__dst, __src, __nwc, __len, __ps, +- __glibc_objsize (__dst)); +- } +- return __wcsnrtombs_alias (__dst, __src, __nwc, __len, __ps); ++ return __glibc_fortify (wcsnrtombs, __len, sizeof (char), ++ __glibc_objsize (__dst), ++ __dst, __src, __nwc, __len, __ps); + } + #endif diff --git a/SOURCES/glibc-upstream-2.34-127.patch b/SOURCES/glibc-upstream-2.34-127.patch new file mode 100644 index 0000000..488cc63 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-127.patch @@ -0,0 +1,358 @@ +commit 536910724d30a38eeecd14cdbe2c93c78ea67646 +Author: Siddhesh Poyarekar +Date: Wed Oct 20 18:13:05 2021 +0530 + + debug: Add tests for _FORTIFY_SOURCE=3 + + Add some testing coverage for _FORTIFY_SOURCE=3. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit ad6f2a010c2ce759936de4747f6e0d53991912f8) + +diff --git a/debug/Makefile b/debug/Makefile +index 6893111cbfaa0900..357f888246061e15 100644 +--- a/debug/Makefile ++++ b/debug/Makefile +@@ -120,6 +120,8 @@ CFLAGS-tst-chk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-chk4.cc += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-chk5.cc += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-chk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error ++CFLAGS-tst-chk7.c += -Wno-format -Wno-deprecated-declarations -Wno-error ++CFLAGS-tst-chk8.cc += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-lfschk1.c += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-lfschk2.c += -Wno-format -Wno-deprecated-declarations -Wno-error + CFLAGS-tst-lfschk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error +@@ -129,6 +131,7 @@ CFLAGS-tst-lfschk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error + LDLIBS-tst-chk4 = -lstdc++ + LDLIBS-tst-chk5 = -lstdc++ + LDLIBS-tst-chk6 = -lstdc++ ++LDLIBS-tst-chk8 = -lstdc++ + LDLIBS-tst-lfschk4 = -lstdc++ + LDLIBS-tst-lfschk5 = -lstdc++ + LDLIBS-tst-lfschk6 = -lstdc++ +@@ -150,16 +153,16 @@ CFLAGS-tst-ssp-1.c += -fstack-protector-all + + tests = backtrace-tst tst-longjmp_chk tst-chk1 tst-chk2 tst-chk3 \ + tst-lfschk1 tst-lfschk2 tst-lfschk3 test-strcpy_chk test-stpcpy_chk \ +- tst-chk4 tst-chk5 tst-chk6 tst-lfschk4 tst-lfschk5 tst-lfschk6 \ +- tst-longjmp_chk2 tst-backtrace2 tst-backtrace3 tst-backtrace4 \ +- tst-backtrace5 tst-backtrace6 ++ tst-chk4 tst-chk5 tst-chk6 tst-chk7 tst-chk8 tst-lfschk4 tst-lfschk5 \ ++ tst-lfschk6 tst-longjmp_chk2 tst-backtrace2 tst-backtrace3 \ ++ tst-backtrace4 tst-backtrace5 tst-backtrace6 + + ifeq ($(have-ssp),yes) + tests += tst-ssp-1 + endif + + ifeq (,$(CXX)) +-tests-unsupported = tst-chk4 tst-chk5 tst-chk6 \ ++tests-unsupported = tst-chk4 tst-chk5 tst-chk6 tst-chk8 \ + tst-lfschk4 tst-lfschk5 tst-lfschk6 + endif + +@@ -193,6 +196,8 @@ $(objpfx)tst-chk3.out: $(gen-locales) + $(objpfx)tst-chk4.out: $(gen-locales) + $(objpfx)tst-chk5.out: $(gen-locales) + $(objpfx)tst-chk6.out: $(gen-locales) ++$(objpfx)tst-chk7.out: $(gen-locales) ++$(objpfx)tst-chk8.out: $(gen-locales) + $(objpfx)tst-lfschk1.out: $(gen-locales) + $(objpfx)tst-lfschk2.out: $(gen-locales) + $(objpfx)tst-lfschk3.out: $(gen-locales) +diff --git a/debug/tst-chk1.c b/debug/tst-chk1.c +index 6c1d32cc62c3a964..68ac00d1808382b8 100644 +--- a/debug/tst-chk1.c ++++ b/debug/tst-chk1.c +@@ -83,8 +83,14 @@ handler (int sig) + _exit (127); + } + ++#if __USE_FORTIFY_LEVEL == 3 ++volatile size_t buf_size = 10; ++#else + char buf[10]; + wchar_t wbuf[10]; ++#define buf_size sizeof (buf) ++#endif ++ + volatile size_t l0; + volatile char *p; + volatile wchar_t *wp; +@@ -123,6 +129,10 @@ int num2 = 987654; + static int + do_test (void) + { ++#if __USE_FORTIFY_LEVEL == 3 ++ char *buf = (char *) malloc (buf_size); ++ wchar_t *wbuf = (wchar_t *) malloc (buf_size * sizeof (wchar_t)); ++#endif + set_fortify_handler (handler); + + struct A { char buf1[9]; char buf2[1]; } a; +@@ -947,93 +957,93 @@ do_test (void) + + rewind (stdin); + +- if (fgets (buf, sizeof (buf), stdin) != buf ++ if (fgets (buf, buf_size, stdin) != buf + || memcmp (buf, "abcdefgh\n", 10)) + FAIL (); +- if (fgets (buf, sizeof (buf), stdin) != buf || memcmp (buf, "ABCDEFGHI", 10)) ++ if (fgets (buf, buf_size, stdin) != buf || memcmp (buf, "ABCDEFGHI", 10)) + FAIL (); + + rewind (stdin); + +- if (fgets (buf, l0 + sizeof (buf), stdin) != buf ++ if (fgets (buf, l0 + buf_size, stdin) != buf + || memcmp (buf, "abcdefgh\n", 10)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (fgets (buf, sizeof (buf) + 1, stdin) != buf) ++ if (fgets (buf, buf_size + 1, stdin) != buf) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (fgets (buf, l0 + sizeof (buf) + 1, stdin) != buf) ++ if (fgets (buf, l0 + buf_size + 1, stdin) != buf) + FAIL (); + CHK_FAIL_END + #endif + + rewind (stdin); + +- if (fgets_unlocked (buf, sizeof (buf), stdin) != buf ++ if (fgets_unlocked (buf, buf_size, stdin) != buf + || memcmp (buf, "abcdefgh\n", 10)) + FAIL (); +- if (fgets_unlocked (buf, sizeof (buf), stdin) != buf ++ if (fgets_unlocked (buf, buf_size, stdin) != buf + || memcmp (buf, "ABCDEFGHI", 10)) + FAIL (); + + rewind (stdin); + +- if (fgets_unlocked (buf, l0 + sizeof (buf), stdin) != buf ++ if (fgets_unlocked (buf, l0 + buf_size, stdin) != buf + || memcmp (buf, "abcdefgh\n", 10)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (fgets_unlocked (buf, sizeof (buf) + 1, stdin) != buf) ++ if (fgets_unlocked (buf, buf_size + 1, stdin) != buf) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (fgets_unlocked (buf, l0 + sizeof (buf) + 1, stdin) != buf) ++ if (fgets_unlocked (buf, l0 + buf_size + 1, stdin) != buf) + FAIL (); + CHK_FAIL_END + #endif + + rewind (stdin); + +- if (fread (buf, 1, sizeof (buf), stdin) != sizeof (buf) ++ if (fread (buf, 1, buf_size, stdin) != buf_size + || memcmp (buf, "abcdefgh\nA", 10)) + FAIL (); +- if (fread (buf, sizeof (buf), 1, stdin) != 1 ++ if (fread (buf, buf_size, 1, stdin) != 1 + || memcmp (buf, "BCDEFGHI\na", 10)) + FAIL (); + + rewind (stdin); + +- if (fread (buf, l0 + 1, sizeof (buf), stdin) != sizeof (buf) ++ if (fread (buf, l0 + 1, buf_size, stdin) != buf_size + || memcmp (buf, "abcdefgh\nA", 10)) + FAIL (); +- if (fread (buf, sizeof (buf), l0 + 1, stdin) != 1 ++ if (fread (buf, buf_size, l0 + 1, stdin) != 1 + || memcmp (buf, "BCDEFGHI\na", 10)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (fread (buf, 1, sizeof (buf) + 1, stdin) != sizeof (buf) + 1) ++ if (fread (buf, 1, buf_size + 1, stdin) != buf_size + 1) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (fread (buf, sizeof (buf) + 1, l0 + 1, stdin) != 1) ++ if (fread (buf, buf_size + 1, l0 + 1, stdin) != 1) + FAIL (); + CHK_FAIL_END + #endif + + rewind (stdin); + +- if (fread_unlocked (buf, 1, sizeof (buf), stdin) != sizeof (buf) ++ if (fread_unlocked (buf, 1, buf_size, stdin) != buf_size + || memcmp (buf, "abcdefgh\nA", 10)) + FAIL (); +- if (fread_unlocked (buf, sizeof (buf), 1, stdin) != 1 ++ if (fread_unlocked (buf, buf_size, 1, stdin) != 1 + || memcmp (buf, "BCDEFGHI\na", 10)) + FAIL (); + +@@ -1048,100 +1058,100 @@ do_test (void) + + rewind (stdin); + +- if (fread_unlocked (buf, l0 + 1, sizeof (buf), stdin) != sizeof (buf) ++ if (fread_unlocked (buf, l0 + 1, buf_size, stdin) != buf_size + || memcmp (buf, "abcdefgh\nA", 10)) + FAIL (); +- if (fread_unlocked (buf, sizeof (buf), l0 + 1, stdin) != 1 ++ if (fread_unlocked (buf, buf_size, l0 + 1, stdin) != 1 + || memcmp (buf, "BCDEFGHI\na", 10)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (fread_unlocked (buf, 1, sizeof (buf) + 1, stdin) != sizeof (buf) + 1) ++ if (fread_unlocked (buf, 1, buf_size + 1, stdin) != buf_size + 1) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (fread_unlocked (buf, sizeof (buf) + 1, l0 + 1, stdin) != 1) ++ if (fread_unlocked (buf, buf_size + 1, l0 + 1, stdin) != 1) + FAIL (); + CHK_FAIL_END + #endif + + lseek (fileno (stdin), 0, SEEK_SET); + +- if (read (fileno (stdin), buf, sizeof (buf) - 1) != sizeof (buf) - 1 ++ if (read (fileno (stdin), buf, buf_size - 1) != buf_size - 1 + || memcmp (buf, "abcdefgh\n", 9)) + FAIL (); +- if (read (fileno (stdin), buf, sizeof (buf) - 1) != sizeof (buf) - 1 ++ if (read (fileno (stdin), buf, buf_size - 1) != buf_size - 1 + || memcmp (buf, "ABCDEFGHI", 9)) + FAIL (); + + lseek (fileno (stdin), 0, SEEK_SET); + +- if (read (fileno (stdin), buf, l0 + sizeof (buf) - 1) != sizeof (buf) - 1 ++ if (read (fileno (stdin), buf, l0 + buf_size - 1) != buf_size - 1 + || memcmp (buf, "abcdefgh\n", 9)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (read (fileno (stdin), buf, sizeof (buf) + 1) != sizeof (buf) + 1) ++ if (read (fileno (stdin), buf, buf_size + 1) != buf_size + 1) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (read (fileno (stdin), buf, l0 + sizeof (buf) + 1) != sizeof (buf) + 1) ++ if (read (fileno (stdin), buf, l0 + buf_size + 1) != buf_size + 1) + FAIL (); + CHK_FAIL_END + #endif + +- if (pread (fileno (stdin), buf, sizeof (buf) - 1, sizeof (buf) - 2) +- != sizeof (buf) - 1 ++ if (pread (fileno (stdin), buf, buf_size - 1, buf_size - 2) ++ != buf_size - 1 + || memcmp (buf, "\nABCDEFGH", 9)) + FAIL (); +- if (pread (fileno (stdin), buf, sizeof (buf) - 1, 0) != sizeof (buf) - 1 ++ if (pread (fileno (stdin), buf, buf_size - 1, 0) != buf_size - 1 + || memcmp (buf, "abcdefgh\n", 9)) + FAIL (); +- if (pread (fileno (stdin), buf, l0 + sizeof (buf) - 1, sizeof (buf) - 3) +- != sizeof (buf) - 1 ++ if (pread (fileno (stdin), buf, l0 + buf_size - 1, buf_size - 3) ++ != buf_size - 1 + || memcmp (buf, "h\nABCDEFG", 9)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (pread (fileno (stdin), buf, sizeof (buf) + 1, 2 * sizeof (buf)) +- != sizeof (buf) + 1) ++ if (pread (fileno (stdin), buf, buf_size + 1, 2 * buf_size) ++ != buf_size + 1) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (pread (fileno (stdin), buf, l0 + sizeof (buf) + 1, 2 * sizeof (buf)) +- != sizeof (buf) + 1) ++ if (pread (fileno (stdin), buf, l0 + buf_size + 1, 2 * buf_size) ++ != buf_size + 1) + FAIL (); + CHK_FAIL_END + #endif + +- if (pread64 (fileno (stdin), buf, sizeof (buf) - 1, sizeof (buf) - 2) +- != sizeof (buf) - 1 ++ if (pread64 (fileno (stdin), buf, buf_size - 1, buf_size - 2) ++ != buf_size - 1 + || memcmp (buf, "\nABCDEFGH", 9)) + FAIL (); +- if (pread64 (fileno (stdin), buf, sizeof (buf) - 1, 0) != sizeof (buf) - 1 ++ if (pread64 (fileno (stdin), buf, buf_size - 1, 0) != buf_size - 1 + || memcmp (buf, "abcdefgh\n", 9)) + FAIL (); +- if (pread64 (fileno (stdin), buf, l0 + sizeof (buf) - 1, sizeof (buf) - 3) +- != sizeof (buf) - 1 ++ if (pread64 (fileno (stdin), buf, l0 + buf_size - 1, buf_size - 3) ++ != buf_size - 1 + || memcmp (buf, "h\nABCDEFG", 9)) + FAIL (); + + #if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START +- if (pread64 (fileno (stdin), buf, sizeof (buf) + 1, 2 * sizeof (buf)) +- != sizeof (buf) + 1) ++ if (pread64 (fileno (stdin), buf, buf_size + 1, 2 * buf_size) ++ != buf_size + 1) + FAIL (); + CHK_FAIL_END + + CHK_FAIL_START +- if (pread64 (fileno (stdin), buf, l0 + sizeof (buf) + 1, 2 * sizeof (buf)) +- != sizeof (buf) + 1) ++ if (pread64 (fileno (stdin), buf, l0 + buf_size + 1, 2 * buf_size) ++ != buf_size + 1) + FAIL (); + CHK_FAIL_END + #endif +@@ -1179,7 +1189,7 @@ do_test (void) + CHK_FAIL2_END + + CHK_FAIL2_START +- snprintf (buf, sizeof (buf), "%3$d\n", 1, 2, 3, 4); ++ snprintf (buf, buf_size, "%3$d\n", 1, 2, 3, 4); + CHK_FAIL2_END + + int sp[2]; +diff --git a/debug/tst-chk7.c b/debug/tst-chk7.c +new file mode 100644 +index 0000000000000000..2a7b32381268135c +--- /dev/null ++++ b/debug/tst-chk7.c +@@ -0,0 +1,2 @@ ++#define _FORTIFY_SOURCE 3 ++#include "tst-chk1.c" +diff --git a/debug/tst-chk8.cc b/debug/tst-chk8.cc +new file mode 100644 +index 0000000000000000..2a7b32381268135c +--- /dev/null ++++ b/debug/tst-chk8.cc +@@ -0,0 +1,2 @@ ++#define _FORTIFY_SOURCE 3 ++#include "tst-chk1.c" diff --git a/SOURCES/glibc-upstream-2.34-128.patch b/SOURCES/glibc-upstream-2.34-128.patch new file mode 100644 index 0000000..f08fa75 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-128.patch @@ -0,0 +1,24 @@ +commit 98ea9372cc8d60b12aa33e007fdd5f5fb042a651 +Author: Siddhesh Poyarekar +Date: Thu Dec 16 07:19:14 2021 +0530 + + __glibc_unsafe_len: Fix comment + + We know that the length is *unsafe*. + + Signed-off-by: Siddhesh Poyarekar + (cherry picked from commit ae23fa3e5fe24daf94fc7f8e5268bb8ceeda7477) + +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index 4825ff0351c1e5d4..3bb9f38916e30295 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -168,7 +168,7 @@ + __s, __osz)) \ + && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz)) + +-/* Conversely, we know at compile time that the length is safe if the ++/* Conversely, we know at compile time that the length is unsafe if the + __L * __S <= __OBJSZ condition can be folded to a constant and if it is + false. */ + #define __glibc_unsafe_len(__l, __s, __osz) \ diff --git a/SOURCES/glibc-upstream-2.34-129.patch b/SOURCES/glibc-upstream-2.34-129.patch new file mode 100644 index 0000000..507a5a5 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-129.patch @@ -0,0 +1,91 @@ +commit 10f7bdebe570e42d1f7a43de4f90eda8ccb4f0da +Author: Siddhesh Poyarekar +Date: Fri Dec 17 18:35:44 2021 +0530 + + fortify: Fix spurious warning with realpath + + The length and object size arguments were swapped around for realpath. + Also add a smoke test so that any changes in this area get caught in + future. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 2bbd07c715275eb6c616988925738a0517180d57) + +diff --git a/debug/Makefile b/debug/Makefile +index 357f888246061e15..bc37e466eed490fa 100644 +--- a/debug/Makefile ++++ b/debug/Makefile +@@ -108,6 +108,7 @@ CFLAGS-tst-longjmp_chk2.c += -fexceptions -fasynchronous-unwind-tables + CPPFLAGS-tst-longjmp_chk2.c += -D_FORTIFY_SOURCE=1 + CFLAGS-tst-longjmp_chk3.c += -fexceptions -fasynchronous-unwind-tables + CPPFLAGS-tst-longjmp_chk3.c += -D_FORTIFY_SOURCE=1 ++CPPFLAGS-tst-realpath-chk.c += -D_FORTIFY_SOURCE=2 + + # We know these tests have problems with format strings, this is what + # we are testing. Disable that warning. They are also testing +@@ -155,7 +156,7 @@ tests = backtrace-tst tst-longjmp_chk tst-chk1 tst-chk2 tst-chk3 \ + tst-lfschk1 tst-lfschk2 tst-lfschk3 test-strcpy_chk test-stpcpy_chk \ + tst-chk4 tst-chk5 tst-chk6 tst-chk7 tst-chk8 tst-lfschk4 tst-lfschk5 \ + tst-lfschk6 tst-longjmp_chk2 tst-backtrace2 tst-backtrace3 \ +- tst-backtrace4 tst-backtrace5 tst-backtrace6 ++ tst-backtrace4 tst-backtrace5 tst-backtrace6 tst-realpath-chk + + ifeq ($(have-ssp),yes) + tests += tst-ssp-1 +diff --git a/debug/tst-realpath-chk.c b/debug/tst-realpath-chk.c +new file mode 100644 +index 0000000000000000..a8fcb327c43fb34d +--- /dev/null ++++ b/debug/tst-realpath-chk.c +@@ -0,0 +1,37 @@ ++/* Smoke test to verify that realpath does not cause spurious warnings. ++ Copyright The GNU Toolchain Authors. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++#include ++ ++static int ++do_test (void) ++{ ++#ifdef PATH_MAX ++ char buf[PATH_MAX + 1]; ++ char *res = realpath (".", buf); ++ TEST_VERIFY (res == buf); ++#endif ++ ++ return 0; ++} ++ ++#include +diff --git a/stdlib/bits/stdlib.h b/stdlib/bits/stdlib.h +index 067115eeca123c6d..ccacbdf76a08225a 100644 +--- a/stdlib/bits/stdlib.h ++++ b/stdlib/bits/stdlib.h +@@ -42,7 +42,7 @@ __NTH (realpath (const char *__restrict __name, char *__restrict __resolved)) + return __realpath_alias (__name, __resolved); + + #if defined _LIBC_LIMITS_H_ && defined PATH_MAX +- if (__glibc_unsafe_len (sz, sizeof (char), PATH_MAX)) ++ if (__glibc_unsafe_len (PATH_MAX, sizeof (char), sz)) + return __realpath_chk_warn (__name, __resolved, sz); + #endif + return __realpath_chk (__name, __resolved, sz); diff --git a/SOURCES/glibc-upstream-2.34-130.patch b/SOURCES/glibc-upstream-2.34-130.patch new file mode 100644 index 0000000..e114c75 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-130.patch @@ -0,0 +1,42 @@ +commit d6a58bd81d07322ff5da8f419d8620ef037b6a36 +Author: Siddhesh Poyarekar +Date: Wed Jan 12 18:46:28 2022 +0530 + + Enable _FORTIFY_SOURCE=3 for gcc 12 and above + + gcc 12 now has support for the __builtin_dynamic_object_size builtin. + Adapt the macro checks to enable _FORTIFY_SOURCE=3 on gcc 12 and above. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 86bf0feb0e3ec8e37872f72499d6ae33406561d7) + +diff --git a/include/features.h b/include/features.h +index d974eabfafc24ffb..933499bcff8f1449 100644 +--- a/include/features.h ++++ b/include/features.h +@@ -412,7 +412,9 @@ + # warning _FORTIFY_SOURCE requires compiling with optimization (-O) + # elif !__GNUC_PREREQ (4, 1) + # warning _FORTIFY_SOURCE requires GCC 4.1 or later +-# elif _FORTIFY_SOURCE > 2 && __glibc_clang_prereq (9, 0) ++# elif _FORTIFY_SOURCE > 2 && (__glibc_clang_prereq (9, 0) \ ++ || __GNUC_PREREQ (12, 0)) ++ + # if _FORTIFY_SOURCE > 3 + # warning _FORTIFY_SOURCE > 3 is treated like 3 on this platform + # endif +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index 3bb9f38916e30295..515fb681a0547217 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -142,7 +142,8 @@ + #define __bos0(ptr) __builtin_object_size (ptr, 0) + + /* Use __builtin_dynamic_object_size at _FORTIFY_SOURCE=3 when available. */ +-#if __USE_FORTIFY_LEVEL == 3 && __glibc_clang_prereq (9, 0) ++#if __USE_FORTIFY_LEVEL == 3 && (__glibc_clang_prereq (9, 0) \ ++ || __GNUC_PREREQ (12, 0)) + # define __glibc_objsize0(__o) __builtin_dynamic_object_size (__o, 0) + # define __glibc_objsize(__o) __builtin_dynamic_object_size (__o, 1) + #else diff --git a/SOURCES/glibc-upstream-2.34-131.patch b/SOURCES/glibc-upstream-2.34-131.patch new file mode 100644 index 0000000..d2b1af9 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-131.patch @@ -0,0 +1,293 @@ +commit f8c2f620f1929ad78cb0a247601bec972f140c51 +Author: Siddhesh Poyarekar +Date: Wed Jan 12 23:34:23 2022 +0530 + + debug: Autogenerate _FORTIFY_SOURCE tests + + Rename debug/tst-chk1.c to debug/tst-fortify.c and add make hackery to + autogenerate tests with different macros enabled to build and run the + same test with different configurations as well as different + fortification levels. + + The change also ends up expanding the -lfs tests to include + _FORTIFY_SOURCE=3. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit db27f1251b008280a29d540b4f8ab2a38a0d80af) + +diff --git a/Makerules b/Makerules +index 596fa68376f45298..7fbe85719aacc230 100644 +--- a/Makerules ++++ b/Makerules +@@ -424,6 +424,12 @@ $(objpfx)%$o: $(objpfx)%.c $(before-compile); $$(compile-command.c) + endef + object-suffixes-left := $(all-object-suffixes) + include $(o-iterator) ++ ++define o-iterator-doit ++$(objpfx)%$o: $(objpfx)%.cc $(before-compile); $$(compile-command.cc) ++endef ++object-suffixes-left := $(all-object-suffixes) ++include $(o-iterator) + endif + + # Generate .dT files as we compile. +diff --git a/debug/Makefile b/debug/Makefile +index bc37e466eed490fa..acc1b8f6ad029c62 100644 +--- a/debug/Makefile ++++ b/debug/Makefile +@@ -1,4 +1,5 @@ +-# Copyright (C) 1998-2021 Free Software Foundation, Inc. ++# Copyright (C) 1998-2022 Free Software Foundation, Inc. ++# Copyright The GNU Toolchain Authors. + # This file is part of the GNU C Library. + + # The GNU C Library is free software; you can redistribute it and/or +@@ -110,32 +111,60 @@ CFLAGS-tst-longjmp_chk3.c += -fexceptions -fasynchronous-unwind-tables + CPPFLAGS-tst-longjmp_chk3.c += -D_FORTIFY_SOURCE=1 + CPPFLAGS-tst-realpath-chk.c += -D_FORTIFY_SOURCE=2 + ++# _FORTIFY_SOURCE tests. ++# Auto-generate tests for _FORTIFY_SOURCE for different levels, compilers and ++# preprocessor conditions based on tst-fortify.c. ++# ++# To add a new test condition, define a cflags-$(cond) make variable to set ++# CFLAGS for the file. ++ ++tests-all-chk = tst-fortify ++tests-c-chk = ++tests-cc-chk = ++ ++CFLAGS-tst-fortify.c += -Wno-format -Wno-deprecated-declarations -Wno-error ++ ++# No additional flags for the default tests. ++define cflags-default ++endef ++ ++define cflags-lfs ++CFLAGS-tst-fortify-$(1)-lfs-$(2).$(1) += -D_FILE_OFFSET_BITS=64 ++endef ++ + # We know these tests have problems with format strings, this is what + # we are testing. Disable that warning. They are also testing + # deprecated functions (notably gets) so disable that warning as well. + # And they also generate warnings from warning attributes, which + # cannot be disabled via pragmas, so require -Wno-error to be used. +-CFLAGS-tst-chk1.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk2.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk4.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk5.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk7.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-chk8.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk1.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk2.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk4.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk5.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-CFLAGS-tst-lfschk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error +-LDLIBS-tst-chk4 = -lstdc++ +-LDLIBS-tst-chk5 = -lstdc++ +-LDLIBS-tst-chk6 = -lstdc++ +-LDLIBS-tst-chk8 = -lstdc++ +-LDLIBS-tst-lfschk4 = -lstdc++ +-LDLIBS-tst-lfschk5 = -lstdc++ +-LDLIBS-tst-lfschk6 = -lstdc++ ++define gen-chk-test ++tests-$(1)-chk += tst-fortify-$(1)-$(2)-$(3) ++CFLAGS-tst-fortify-$(1)-$(2)-$(3).$(1) += -D_FORTIFY_SOURCE=$(3) -Wno-format \ ++ -Wno-deprecated-declarations \ ++ -Wno-error ++$(eval $(call cflags-$(2),$(1),$(3))) ++$(objpfx)tst-fortify-$(1)-$(2)-$(3).$(1): tst-fortify.c Makefile ++ ( echo "/* Autogenerated from Makefile. */"; \ ++ echo ""; \ ++ echo "#include \"tst-fortify.c\"" ) > $$@.tmp ++ mv $$@.tmp $$@ ++endef ++ ++chk-extensions = c cc ++chk-types = default lfs ++chk-levels = 1 2 3 ++ ++$(foreach e,$(chk-extensions), \ ++ $(foreach t,$(chk-types), \ ++ $(foreach l,$(chk-levels), \ ++ $(eval $(call gen-chk-test,$(e),$(t),$(l)))))) ++ ++tests-all-chk += $(tests-c-chk) $(tests-cc-chk) ++ ++define link-cc ++LDLIBS-$(1) = -lstdc++ ++endef ++$(foreach t,$(tests-cc-chk), $(eval $(call link-cc,$(t)))) + + # backtrace_symbols only works if we link with -rdynamic. backtrace + # requires unwind tables on most architectures. +@@ -152,19 +181,25 @@ LDFLAGS-tst-backtrace6 = -rdynamic + + CFLAGS-tst-ssp-1.c += -fstack-protector-all + +-tests = backtrace-tst tst-longjmp_chk tst-chk1 tst-chk2 tst-chk3 \ +- tst-lfschk1 tst-lfschk2 tst-lfschk3 test-strcpy_chk test-stpcpy_chk \ +- tst-chk4 tst-chk5 tst-chk6 tst-chk7 tst-chk8 tst-lfschk4 tst-lfschk5 \ +- tst-lfschk6 tst-longjmp_chk2 tst-backtrace2 tst-backtrace3 \ +- tst-backtrace4 tst-backtrace5 tst-backtrace6 tst-realpath-chk ++tests = backtrace-tst \ ++ tst-longjmp_chk \ ++ test-strcpy_chk \ ++ test-stpcpy_chk \ ++ tst-longjmp_chk2 \ ++ tst-backtrace2 \ ++ tst-backtrace3 \ ++ tst-backtrace4 \ ++ tst-backtrace5 \ ++ tst-backtrace6 \ ++ tst-realpath-chk \ ++ $(tests-all-chk) + + ifeq ($(have-ssp),yes) + tests += tst-ssp-1 + endif + + ifeq (,$(CXX)) +-tests-unsupported = tst-chk4 tst-chk5 tst-chk6 tst-chk8 \ +- tst-lfschk4 tst-lfschk5 tst-lfschk6 ++tests-unsupported = $(tests-cc-chk) + endif + + extra-libs = libSegFault libpcprofile +@@ -191,20 +226,10 @@ ifeq ($(run-built-tests),yes) + LOCALES := de_DE.UTF-8 + include ../gen-locales.mk + +-$(objpfx)tst-chk1.out: $(gen-locales) +-$(objpfx)tst-chk2.out: $(gen-locales) +-$(objpfx)tst-chk3.out: $(gen-locales) +-$(objpfx)tst-chk4.out: $(gen-locales) +-$(objpfx)tst-chk5.out: $(gen-locales) +-$(objpfx)tst-chk6.out: $(gen-locales) +-$(objpfx)tst-chk7.out: $(gen-locales) +-$(objpfx)tst-chk8.out: $(gen-locales) +-$(objpfx)tst-lfschk1.out: $(gen-locales) +-$(objpfx)tst-lfschk2.out: $(gen-locales) +-$(objpfx)tst-lfschk3.out: $(gen-locales) +-$(objpfx)tst-lfschk4.out: $(gen-locales) +-$(objpfx)tst-lfschk5.out: $(gen-locales) +-$(objpfx)tst-lfschk6.out: $(gen-locales) ++define chk-gen-locales ++$(objpfx)$(1).out: $(gen-locales) ++endef ++$(foreach t, $(tests-all-chk), $(eval $(call chk-gen-locales,$(t)))) + endif + + sLIBdir := $(shell echo $(slibdir) | sed 's,lib\(\|64\)$$,\\\\$$LIB,') +diff --git a/debug/tst-chk2.c b/debug/tst-chk2.c +deleted file mode 100644 +index be37ce2d22f0760a..0000000000000000 +--- a/debug/tst-chk2.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 1 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk3.c b/debug/tst-chk3.c +deleted file mode 100644 +index 38b8e4fb360ba722..0000000000000000 +--- a/debug/tst-chk3.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 2 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk4.cc b/debug/tst-chk4.cc +deleted file mode 100644 +index c82e6aac86038791..0000000000000000 +--- a/debug/tst-chk4.cc ++++ /dev/null +@@ -1 +0,0 @@ +-#include "tst-chk1.c" +diff --git a/debug/tst-chk5.cc b/debug/tst-chk5.cc +deleted file mode 100644 +index be37ce2d22f0760a..0000000000000000 +--- a/debug/tst-chk5.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 1 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk6.cc b/debug/tst-chk6.cc +deleted file mode 100644 +index 38b8e4fb360ba722..0000000000000000 +--- a/debug/tst-chk6.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 2 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk7.c b/debug/tst-chk7.c +deleted file mode 100644 +index 2a7b32381268135c..0000000000000000 +--- a/debug/tst-chk7.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 3 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk8.cc b/debug/tst-chk8.cc +deleted file mode 100644 +index 2a7b32381268135c..0000000000000000 +--- a/debug/tst-chk8.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FORTIFY_SOURCE 3 +-#include "tst-chk1.c" +diff --git a/debug/tst-chk1.c b/debug/tst-fortify.c +similarity index 100% +rename from debug/tst-chk1.c +rename to debug/tst-fortify.c +diff --git a/debug/tst-lfschk1.c b/debug/tst-lfschk1.c +deleted file mode 100644 +index f3e6d47d5e4484c3..0000000000000000 +--- a/debug/tst-lfschk1.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk1.c" +diff --git a/debug/tst-lfschk2.c b/debug/tst-lfschk2.c +deleted file mode 100644 +index 95d4db1d32d2eeb3..0000000000000000 +--- a/debug/tst-lfschk2.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk2.c" +diff --git a/debug/tst-lfschk3.c b/debug/tst-lfschk3.c +deleted file mode 100644 +index 50a1ae1258f1553d..0000000000000000 +--- a/debug/tst-lfschk3.c ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk3.c" +diff --git a/debug/tst-lfschk4.cc b/debug/tst-lfschk4.cc +deleted file mode 100644 +index f3e6d47d5e4484c3..0000000000000000 +--- a/debug/tst-lfschk4.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk1.c" +diff --git a/debug/tst-lfschk5.cc b/debug/tst-lfschk5.cc +deleted file mode 100644 +index 95d4db1d32d2eeb3..0000000000000000 +--- a/debug/tst-lfschk5.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk2.c" +diff --git a/debug/tst-lfschk6.cc b/debug/tst-lfschk6.cc +deleted file mode 100644 +index 50a1ae1258f1553d..0000000000000000 +--- a/debug/tst-lfschk6.cc ++++ /dev/null +@@ -1,2 +0,0 @@ +-#define _FILE_OFFSET_BITS 64 +-#include "tst-chk3.c" diff --git a/SOURCES/glibc-upstream-2.34-132.patch b/SOURCES/glibc-upstream-2.34-132.patch new file mode 100644 index 0000000..9da05ba --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-132.patch @@ -0,0 +1,356 @@ +commit 224d8c1890b6c57c7e4e8ddbb792dd9552086704 +Author: Siddhesh Poyarekar +Date: Wed Jan 12 23:34:48 2022 +0530 + + debug: Synchronize feature guards in fortified functions [BZ #28746] + + Some functions (e.g. stpcpy, pread64, etc.) had moved to POSIX in the + main headers as they got incorporated into the standard, but their + fortified variants remained under __USE_GNU. As a result, these + functions did not get fortified when _GNU_SOURCE was not defined. + + Add test wrappers that check all functions tested in tst-chk0 at all + levels with _GNU_SOURCE undefined and then use the failures to (1) + exclude checks for _GNU_SOURCE functions in these tests and (2) Fix + feature macro guards in the fortified function headers so that they're + the same as the ones in the main headers. + + This fixes BZ #28746. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Adhemerval Zanella + (cherry picked from commit fcfc9086815bf0d277ad47a90ee3fda4c37acca8) + +diff --git a/debug/Makefile b/debug/Makefile +index acc1b8f6ad029c62..71248e0d457a5b12 100644 +--- a/debug/Makefile ++++ b/debug/Makefile +@@ -132,6 +132,12 @@ define cflags-lfs + CFLAGS-tst-fortify-$(1)-lfs-$(2).$(1) += -D_FILE_OFFSET_BITS=64 + endef + ++define cflags-nongnu ++CFLAGS-tst-fortify-$(1)-nongnu-$(2).$(1) += -D_LARGEFILE64_SOURCE=1 ++endef ++ ++src-chk-nongnu = \#undef _GNU_SOURCE ++ + # We know these tests have problems with format strings, this is what + # we are testing. Disable that warning. They are also testing + # deprecated functions (notably gets) so disable that warning as well. +@@ -145,13 +151,13 @@ CFLAGS-tst-fortify-$(1)-$(2)-$(3).$(1) += -D_FORTIFY_SOURCE=$(3) -Wno-format \ + $(eval $(call cflags-$(2),$(1),$(3))) + $(objpfx)tst-fortify-$(1)-$(2)-$(3).$(1): tst-fortify.c Makefile + ( echo "/* Autogenerated from Makefile. */"; \ +- echo ""; \ ++ echo "$(src-chk-$(2))"; \ + echo "#include \"tst-fortify.c\"" ) > $$@.tmp + mv $$@.tmp $$@ + endef + + chk-extensions = c cc +-chk-types = default lfs ++chk-types = default lfs nongnu + chk-levels = 1 2 3 + + $(foreach e,$(chk-extensions), \ +diff --git a/debug/tst-fortify.c b/debug/tst-fortify.c +index 68ac00d1808382b8..8b5902423cf0ad88 100644 +--- a/debug/tst-fortify.c ++++ b/debug/tst-fortify.c +@@ -1,4 +1,5 @@ +-/* Copyright (C) 2004-2021 Free Software Foundation, Inc. ++/* Copyright (C) 2004-2022 Free Software Foundation, Inc. ++ Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2004. + +@@ -37,6 +38,17 @@ + #include + #include + ++#ifndef _GNU_SOURCE ++# define MEMPCPY memcpy ++# define WMEMPCPY wmemcpy ++# define MEMPCPY_RET(x) 0 ++# define WMEMPCPY_RET(x) 0 ++#else ++# define MEMPCPY mempcpy ++# define WMEMPCPY wmempcpy ++# define MEMPCPY_RET(x) __builtin_strlen (x) ++# define WMEMPCPY_RET(x) wcslen (x) ++#endif + + #define obstack_chunk_alloc malloc + #define obstack_chunk_free free +@@ -163,7 +175,7 @@ do_test (void) + if (memcmp (buf, "aabcdefghi", 10)) + FAIL (); + +- if (mempcpy (buf + 5, "abcde", 5) != buf + 10 ++ if (MEMPCPY (buf + 5, "abcde", 5) != buf + 5 + MEMPCPY_RET ("abcde") + || memcmp (buf, "aabcdabcde", 10)) + FAIL (); + +@@ -208,7 +220,7 @@ do_test (void) + if (memcmp (buf, "aabcdefghi", 10)) + FAIL (); + +- if (mempcpy (buf + 5, "abcde", l0 + 5) != buf + 10 ++ if (MEMPCPY (buf + 5, "abcde", l0 + 5) != buf + 5 + MEMPCPY_RET ("abcde") + || memcmp (buf, "aabcdabcde", 10)) + FAIL (); + +@@ -267,7 +279,8 @@ do_test (void) + if (memcmp (a.buf1, "aabcdefghi", 10)) + FAIL (); + +- if (mempcpy (a.buf1 + 5, "abcde", l0 + 5) != a.buf1 + 10 ++ if (MEMPCPY (a.buf1 + 5, "abcde", l0 + 5) ++ != a.buf1 + 5 + MEMPCPY_RET ("abcde") + || memcmp (a.buf1, "aabcdabcde", 10)) + FAIL (); + +@@ -348,6 +361,7 @@ do_test (void) + bcopy (buf + 1, buf + 2, l0 + 9); + CHK_FAIL_END + ++#ifdef _GNU_SOURCE + CHK_FAIL_START + p = (char *) mempcpy (buf + 6, "abcde", 5); + CHK_FAIL_END +@@ -355,6 +369,7 @@ do_test (void) + CHK_FAIL_START + p = (char *) mempcpy (buf + 6, "abcde", l0 + 5); + CHK_FAIL_END ++#endif + + CHK_FAIL_START + memset (buf + 9, 'j', 2); +@@ -465,6 +480,7 @@ do_test (void) + bcopy (a.buf1 + 1, a.buf1 + 2, l0 + 9); + CHK_FAIL_END + ++#ifdef _GNU_SOURCE + CHK_FAIL_START + p = (char *) mempcpy (a.buf1 + 6, "abcde", 5); + CHK_FAIL_END +@@ -472,6 +488,7 @@ do_test (void) + CHK_FAIL_START + p = (char *) mempcpy (a.buf1 + 6, "abcde", l0 + 5); + CHK_FAIL_END ++#endif + + CHK_FAIL_START + memset (a.buf1 + 9, 'j', 2); +@@ -551,7 +568,7 @@ do_test (void) + if (wmemcmp (wbuf, L"aabcdefghi", 10)) + FAIL (); + +- if (wmempcpy (wbuf + 5, L"abcde", 5) != wbuf + 10 ++ if (WMEMPCPY (wbuf + 5, L"abcde", 5) != wbuf + 5 + WMEMPCPY_RET (L"abcde") + || wmemcmp (wbuf, L"aabcdabcde", 10)) + FAIL (); + +@@ -584,7 +601,8 @@ do_test (void) + if (wmemcmp (wbuf, L"aabcdefghi", 10)) + FAIL (); + +- if (wmempcpy (wbuf + 5, L"abcde", l0 + 5) != wbuf + 10 ++ if (WMEMPCPY (wbuf + 5, L"abcde", l0 + 5) ++ != wbuf + 5 + WMEMPCPY_RET (L"abcde") + || wmemcmp (wbuf, L"aabcdabcde", 10)) + FAIL (); + +@@ -626,7 +644,8 @@ do_test (void) + if (wmemcmp (wa.buf1, L"aabcdefghi", 10)) + FAIL (); + +- if (wmempcpy (wa.buf1 + 5, L"abcde", l0 + 5) != wa.buf1 + 10 ++ if (WMEMPCPY (wa.buf1 + 5, L"abcde", l0 + 5) ++ != wa.buf1 + 5 + WMEMPCPY_RET (L"abcde") + || wmemcmp (wa.buf1, L"aabcdabcde", 10)) + FAIL (); + +@@ -695,6 +714,7 @@ do_test (void) + wmemmove (wbuf + 2, wbuf + 1, l0 + 9); + CHK_FAIL_END + ++#ifdef _GNU_SOURCE + CHK_FAIL_START + wp = wmempcpy (wbuf + 6, L"abcde", 5); + CHK_FAIL_END +@@ -702,6 +722,7 @@ do_test (void) + CHK_FAIL_START + wp = wmempcpy (wbuf + 6, L"abcde", l0 + 5); + CHK_FAIL_END ++#endif + + CHK_FAIL_START + wmemset (wbuf + 9, L'j', 2); +@@ -769,6 +790,7 @@ do_test (void) + wmemmove (wa.buf1 + 2, wa.buf1 + 1, l0 + 9); + CHK_FAIL_END + ++#ifdef _GNU_SOURCE + CHK_FAIL_START + wp = wmempcpy (wa.buf1 + 6, L"abcde", 5); + CHK_FAIL_END +@@ -776,6 +798,7 @@ do_test (void) + CHK_FAIL_START + wp = wmempcpy (wa.buf1 + 6, L"abcde", l0 + 5); + CHK_FAIL_END ++#endif + + CHK_FAIL_START + wmemset (wa.buf1 + 9, L'j', 2); +@@ -907,6 +930,7 @@ do_test (void) + if (fprintf (fp, buf2 + 4, str5) != 7) + FAIL (); + ++#ifdef _GNU_SOURCE + char *my_ptr = NULL; + strcpy (buf2 + 2, "%n%s%n"); + /* When the format string is writable and contains %n, +@@ -936,6 +960,7 @@ do_test (void) + if (obstack_printf (&obs, "%s%n%s%n", str4, &n1, str5, &n1) != 14) + FAIL (); + obstack_free (&obs, NULL); ++#endif + + if (freopen (temp_filename, "r", stdin) == NULL) + { +@@ -983,6 +1008,7 @@ do_test (void) + + rewind (stdin); + ++#ifdef _GNU_SOURCE + if (fgets_unlocked (buf, buf_size, stdin) != buf + || memcmp (buf, "abcdefgh\n", 10)) + FAIL (); +@@ -1009,6 +1035,7 @@ do_test (void) + #endif + + rewind (stdin); ++#endif + + if (fread (buf, 1, buf_size, stdin) != buf_size + || memcmp (buf, "abcdefgh\nA", 10)) +@@ -1579,7 +1606,10 @@ do_test (void) + ret = 1; + } + +- int fd = posix_openpt (O_RDWR); ++ int fd; ++ ++#ifdef _GNU_SOURCE ++ fd = posix_openpt (O_RDWR); + if (fd != -1) + { + char enough[1000]; +@@ -1595,6 +1625,7 @@ do_test (void) + #endif + close (fd); + } ++#endif + + #if PATH_MAX > 0 + confstr (_CS_GNU_LIBC_VERSION, largebuf, sizeof (largebuf)); +@@ -1712,8 +1743,9 @@ do_test (void) + poll (fds, l0 + 2, 0); + CHK_FAIL_END + #endif ++#ifdef _GNU_SOURCE + ppoll (fds, 1, NULL, NULL); +-#if __USE_FORTIFY_LEVEL >= 1 ++# if __USE_FORTIFY_LEVEL >= 1 + CHK_FAIL_START + ppoll (fds, 2, NULL, NULL); + CHK_FAIL_END +@@ -1721,6 +1753,7 @@ do_test (void) + CHK_FAIL_START + ppoll (fds, l0 + 2, NULL, NULL); + CHK_FAIL_END ++# endif + #endif + + return ret; +diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h +index 697dcbbf7b4b26f6..1df7e5ceef3e1fd4 100644 +--- a/posix/bits/unistd.h ++++ b/posix/bits/unistd.h +@@ -40,7 +40,7 @@ read (int __fd, void *__buf, size_t __nbytes) + __fd, __buf, __nbytes); + } + +-#ifdef __USE_UNIX98 ++#if defined __USE_UNIX98 || defined __USE_XOPEN2K8 + extern ssize_t __pread_chk (int __fd, void *__buf, size_t __nbytes, + __off_t __offset, size_t __bufsize) + __wur __attr_access ((__write_only__, 2, 3)); +diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h +index 5731274848260ad2..218006c9ba882d9c 100644 +--- a/string/bits/string_fortified.h ++++ b/string/bits/string_fortified.h +@@ -79,7 +79,7 @@ __NTH (strcpy (char *__restrict __dest, const char *__restrict __src)) + return __builtin___strcpy_chk (__dest, __src, __glibc_objsize (__dest)); + } + +-#ifdef __USE_GNU ++#ifdef __USE_XOPEN2K8 + __fortify_function char * + __NTH (stpcpy (char *__restrict __dest, const char *__restrict __src)) + { +@@ -96,14 +96,15 @@ __NTH (strncpy (char *__restrict __dest, const char *__restrict __src, + __glibc_objsize (__dest)); + } + +-#if __GNUC_PREREQ (4, 7) || __glibc_clang_prereq (2, 6) ++#ifdef __USE_XOPEN2K8 ++# if __GNUC_PREREQ (4, 7) || __glibc_clang_prereq (2, 6) + __fortify_function char * + __NTH (stpncpy (char *__dest, const char *__src, size_t __n)) + { + return __builtin___stpncpy_chk (__dest, __src, __n, + __glibc_objsize (__dest)); + } +-#else ++# else + extern char *__stpncpy_chk (char *__dest, const char *__src, size_t __n, + size_t __destlen) __THROW + __fortified_attr_access ((__write_only__, 1, 3)) +@@ -119,6 +120,7 @@ __NTH (stpncpy (char *__dest, const char *__src, size_t __n)) + return __stpncpy_chk (__dest, __src, __n, __bos (__dest)); + return __stpncpy_alias (__dest, __src, __n); + } ++# endif + #endif + + +diff --git a/support/xsignal.h b/support/xsignal.h +index 8ee1fa6b4dceeadf..692e0f2c4242d848 100644 +--- a/support/xsignal.h ++++ b/support/xsignal.h +@@ -28,7 +28,9 @@ __BEGIN_DECLS + terminate the process on error. */ + + void xraise (int sig); ++#ifdef _GNU_SOURCE + sighandler_t xsignal (int sig, sighandler_t handler); ++#endif + void xsigaction (int sig, const struct sigaction *newact, + struct sigaction *oldact); + +diff --git a/wcsmbs/bits/wchar2.h b/wcsmbs/bits/wchar2.h +index 26012ef9366c0b88..88c1fdfcd34292f4 100644 +--- a/wcsmbs/bits/wchar2.h ++++ b/wcsmbs/bits/wchar2.h +@@ -457,7 +457,7 @@ __NTH (wcsrtombs (char *__restrict __dst, const wchar_t **__restrict __src, + } + + +-#ifdef __USE_GNU ++#ifdef __USE_XOPEN2K8 + extern size_t __mbsnrtowcs_chk (wchar_t *__restrict __dst, + const char **__restrict __src, size_t __nmc, + size_t __len, mbstate_t *__restrict __ps, diff --git a/SOURCES/glibc-upstream-2.34-133.patch b/SOURCES/glibc-upstream-2.34-133.patch new file mode 100644 index 0000000..a0c4478 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-133.patch @@ -0,0 +1,178 @@ +commit 738ee53f0ce5e39b9b7a6777f5d3057afbaac498 +Author: John David Anglin +Date: Tue Mar 15 23:12:37 2022 +0000 + + hppa: Implement swapcontext in assembler (bug 28960) + + When swapcontext.c is compiled without -g, the following error occurs: + Error: CFI instruction used without previous .cfi_startproc + + Fix by converting swapcontext routine to assembler. + +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.S b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +new file mode 100644 +index 0000000000000000..94b164dc6375563e +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +@@ -0,0 +1,72 @@ ++/* Swap to new context. ++ Copyright (C) 2008-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include "ucontext_i.h" ++ ++ .text ++ENTRY(__swapcontext) ++ ++ /* Copy rp to ret0 (r28). */ ++ copy %rp,%ret0 ++ ++ /* Create a frame. */ ++ ldo 64(%sp),%sp ++ .cfi_def_cfa_offset -64 ++ ++ /* Save the current machine context to oucp. */ ++ bl __getcontext,%rp ++ ++ /* Copy oucp to register ret1 (r29). __getcontext saves and ++ restores it on a normal return. It is restored from oR29 ++ on reactivation. */ ++ copy %r26,%ret1 ++ ++ /* Pop frame. */ ++ ldo -64(%sp),%sp ++ .cfi_def_cfa_offset 0 ++ ++ /* Load return pointer from oR28. */ ++ ldw oR28(%ret1),%rp ++ ++ /* Return if error. */ ++ or,= %r0,%ret0,%r0 ++ bv,n %r0(%rp) ++ ++ /* Load sc_sar flag. */ ++ ldb oSAR(%ret1),%r20 ++ ++ /* Return if oucp context has been reactivated. */ ++ or,= %r0,%r20,%r0 ++ bv,n %r0(%rp) ++ ++ /* Mark sc_sar flag. */ ++ ldi 1,%r20 ++ stb %r20,oSAR(%ret1) ++ ++ /* Activate the machine context in ucp. */ ++ bl __setcontext,%rp ++ ldw oR25(%ret1),%r26 ++ ++ /* Load return pointer. */ ++ ldw oR28(%ret1),%rp ++ bv,n %r0(%rp) ++ ++PSEUDO_END(__swapcontext) ++ ++weak_alias (__swapcontext, swapcontext) +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.c b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +deleted file mode 100644 +index 1664f68c7b9982e8..0000000000000000 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.c ++++ /dev/null +@@ -1,83 +0,0 @@ +-/* Swap to new context. +- Copyright (C) 2008-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Helge Deller , 2008. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +-#include "ucontext_i.h" +- +-extern int __getcontext (ucontext_t *ucp); +-extern int __setcontext (const ucontext_t *ucp); +- +-int +-__swapcontext (ucontext_t *oucp, const ucontext_t *ucp) +-{ +- /* Save rp for debugger. */ +- asm ("stw %rp,-20(%sp)"); +- asm (".cfi_offset 2, -20"); +- +- /* Copy rp to ret0 (r28). */ +- asm ("copy %rp,%ret0"); +- +- /* Create a frame. */ +- asm ("ldo 64(%sp),%sp"); +- asm (".cfi_def_cfa_offset -64"); +- +- /* Save the current machine context to oucp. */ +- asm ("bl __getcontext,%rp"); +- +- /* Copy oucp to register ret1 (r29). __getcontext saves and restores it +- on a normal return. It is restored from oR29 on reactivation. */ +- asm ("copy %r26,%ret1"); +- +- /* Pop frame. */ +- asm ("ldo -64(%sp),%sp"); +- asm (".cfi_def_cfa_offset 0"); +- +- /* Load return pointer from oR28. */ +- asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); +- +- /* Return if error. */ +- asm ("or,= %r0,%ret0,%r0"); +- asm ("bv,n %r0(%rp)"); +- +- /* Load sc_sar flag. */ +- asm ("ldb %0(%%ret1),%%r20" : : "i" (oSAR)); +- +- /* Return if oucp context has been reactivated. */ +- asm ("or,= %r0,%r20,%r0"); +- asm ("bv,n %r0(%rp)"); +- +- /* Mark sc_sar flag. */ +- asm ("1: ldi 1,%r20"); +- asm ("stb %%r20,%0(%%ret1)" : : "i" (oSAR)); +- +- /* Activate the machine context in ucp. */ +- asm ("bl __setcontext,%rp"); +- asm ("ldw %0(%%ret1),%%r26" : : "i" (oR25)); +- +- /* Load return pointer. */ +- asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); +- +- /* A successful call to setcontext does not return. */ +- asm ("bv,n %r0(%rp)"); +- +- /* Make gcc happy. */ +- return 0; +-} +- +-weak_alias (__swapcontext, swapcontext) diff --git a/SOURCES/glibc-upstream-2.34-134.patch b/SOURCES/glibc-upstream-2.34-134.patch new file mode 100644 index 0000000..79d45ee --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-134.patch @@ -0,0 +1,20 @@ +commit d53b9cc391c72a1011ea8fe7a9f70dc5060a0db2 +Author: John David Anglin +Date: Tue Mar 15 23:04:39 2022 +0000 + + hppa: Use END instead of PSEUDO_END in swapcontext.S + + (cherry picked from commit 7a5c440102d4ec7fafd9bbd98eca9bd90ecaaafd) + +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.S b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +index 94b164dc6375563e..fbc22586d1195a0d 100644 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +@@ -67,6 +67,6 @@ ENTRY(__swapcontext) + ldw oR28(%ret1),%rp + bv,n %r0(%rp) + +-PSEUDO_END(__swapcontext) ++END(__swapcontext) + + weak_alias (__swapcontext, swapcontext) diff --git a/SOURCES/glibc-upstream-2.34-135.patch b/SOURCES/glibc-upstream-2.34-135.patch new file mode 100644 index 0000000..e5b3840 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-135.patch @@ -0,0 +1,35 @@ +commit 4b5b8a1cdf39bed02b8b973717796eccde455ed6 +Author: Fangrui Song +Date: Mon Sep 27 10:12:50 2021 -0700 + + powerpc: Delete unneeded ELF_MACHINE_BEFORE_RTLD_RELOC + + Reviewed-by: Raphael M Zinsly + (cherry picked from commit 8e2557a2b85b2eb0ed50a9016a4ffc6b859b97e6) + +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index ced3a7b659cfcff1..b93cf486b6cda5fd 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -109,8 +109,6 @@ elf_machine_load_address (void) + return runtime_dynamic - elf_machine_dynamic (); + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ +- + /* The PLT uses Elf32_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index b90f407119efd431..b3f3352bcf5a52b0 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -116,8 +116,6 @@ elf_machine_dynamic (void) + return runtime_dynamic - elf_machine_load_address() ; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ +- + /* The PLT uses Elf64_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela + diff --git a/SOURCES/glibc-upstream-2.34-136.patch b/SOURCES/glibc-upstream-2.34-136.patch new file mode 100644 index 0000000..0906c1c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-136.patch @@ -0,0 +1,2020 @@ +commit b19de59d620b3a9e6adf937f322f4281b67fc712 +Author: Fangrui Song +Date: Thu Oct 7 11:55:02 2021 -0700 + + elf: Avoid nested functions in the loader [BZ #27220] + + dynamic-link.h is included more than once in some elf/ files (rtld.c, + dl-conflict.c, dl-reloc.c, dl-reloc-static-pie.c) and uses GCC nested + functions. This harms readability and the nested functions usage + is the biggest obstacle prevents Clang build (Clang doesn't support GCC + nested functions). + + The key idea for unnesting is to add extra parameters (struct link_map + *and struct r_scope_elm *[]) to RESOLVE_MAP, + ELF_MACHINE_BEFORE_RTLD_RELOC, ELF_DYNAMIC_RELOCATE, elf_machine_rel[a], + elf_machine_lazy_rel, and elf_machine_runtime_setup. (This is inspired + by Stan Shebs' ppc64/x86-64 implementation in the + google/grte/v5-2.27/master which uses mixed extra parameters and static + variables.) + + Future simplification: + * If mips elf_machine_runtime_setup no longer needs RESOLVE_GOTSYM, + elf_machine_runtime_setup can drop the `scope` parameter. + * If TLSDESC no longer need to be in elf_machine_lazy_rel, + elf_machine_lazy_rel can drop the `scope` parameter. + + Tested on aarch64, i386, x86-64, powerpc64le, powerpc64, powerpc32, + sparc64, sparcv9, s390x, s390, hppa, ia64, armhf, alpha, and mips64. + In addition, tested build-many-glibcs.py with {arc,csky,microblaze,nios2}-linux-gnu + and riscv64-linux-gnu-rv64imafdc-lp64d. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 490e6c62aa31a8aa5c4a059f6e646ede121edf0a) + +diff --git a/elf/dl-conflict.c b/elf/dl-conflict.c +index 31a2f90770ce2a55..5c8e51d19ae095d6 100644 +--- a/elf/dl-conflict.c ++++ b/elf/dl-conflict.c +@@ -27,20 +27,12 @@ + #include + #include "dynamic-link.h" + +-void +-_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, +- ElfW(Rela) *conflictend) +-{ +-#if ! ELF_MACHINE_NO_RELA +- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC)) +- _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name)); +- +- { +- /* Do the conflict relocation of the object and library GOT and other +- data. */ ++/* Used at loading time solely for prelink executable. It is not called ++ concurrently so it is be safe to defined as static. */ ++static struct link_map *resolve_conflict_map __attribute__ ((__unused__)); + + /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ +-#define RESOLVE_MAP(ref, version, flags) (*ref = NULL, NULL) ++#define RESOLVE_MAP(map, scope, ref, version, flags) (*ref = NULL, NULL) + #define RESOLVE(ref, version, flags) (*ref = NULL, 0) + #define RESOLVE_CONFLICT_FIND_MAP(map, r_offset) \ + do { \ +@@ -51,12 +43,23 @@ _dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, + (map) = resolve_conflict_map; \ + } while (0) + ++#include "dynamic-link.h" ++ ++void ++_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, ++ ElfW(Rela) *conflictend) ++{ ++#if ! ELF_MACHINE_NO_RELA ++ if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC)) ++ _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name)); ++ ++ { ++ /* Do the conflict relocation of the object and library GOT and other ++ data. */ ++ + /* Prelinking makes no sense for anything but the main namespace. */ + assert (l->l_ns == LM_ID_BASE); +- struct link_map *resolve_conflict_map __attribute__ ((__unused__)) +- = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- +-#include "dynamic-link.h" ++ resolve_conflict_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded; + + /* Override these, defined in dynamic-link.h. */ + #undef CHECK_STATIC_TLS +@@ -67,8 +70,8 @@ _dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, + GL(dl_num_cache_relocations) += conflictend - conflict; + + for (; conflict < conflictend; ++conflict) +- elf_machine_rela (l, conflict, NULL, NULL, (void *) conflict->r_offset, +- 0); ++ elf_machine_rela (l, NULL, conflict, NULL, NULL, ++ (void *) conflict->r_offset, 0); + } + #endif + } +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index 2fb02d727654c87d..a52ba8aeb8b573cb 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -19,8 +19,14 @@ + #if ENABLE_STATIC_PIE + /* Mark symbols hidden in static PIE for early self relocation to work. */ + # pragma GCC visibility push(hidden) ++#include + #include + #include ++ ++#include ++ ++#define STATIC_PIE_BOOTSTRAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + + /* Relocate static executable with PIE. */ +@@ -30,11 +36,6 @@ _dl_relocate_static_pie (void) + { + struct link_map *main_map = _dl_get_dl_main_map (); + +-# define STATIC_PIE_BOOTSTRAP +-# define BOOTSTRAP_MAP (main_map) +-# define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP +-# include "dynamic-link.h" +- + /* Figure out the run-time load address of static PIE. */ + main_map->l_addr = elf_machine_load_address (); + +@@ -53,12 +54,12 @@ _dl_relocate_static_pie (void) + elf_get_dynamic_info (main_map); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC +- ELF_MACHINE_BEFORE_RTLD_RELOC (main_map->l_info); ++ ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); + # endif + + /* Relocate ourselves so we can do normal function calls and + data access using the global offset table. */ +- ELF_DYNAMIC_RELOCATE (main_map, 0, 0, 0); ++ ELF_DYNAMIC_RELOCATE (main_map, NULL, 0, 0, 0); + main_map->l_relocated = 1; + + /* Initialize _r_debug. */ +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index e13a672ade6d7a28..3447de7f3536cd70 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -162,6 +162,32 @@ _dl_nothread_init_static_tls (struct link_map *map) + } + #endif /* !THREAD_GSCOPE_IN_TCB */ + ++/* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ ++#define RESOLVE_MAP(l, scope, ref, version, r_type) \ ++ ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \ ++ && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \ ++ ? ((__glibc_unlikely ((*ref) == l->l_lookup_cache.sym) \ ++ && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \ ++ ? (bump_num_cache_relocations (), \ ++ (*ref) = l->l_lookup_cache.ret, \ ++ l->l_lookup_cache.value) \ ++ : ({ lookup_t _lr; \ ++ int _tc = elf_machine_type_class (r_type); \ ++ l->l_lookup_cache.type_class = _tc; \ ++ l->l_lookup_cache.sym = (*ref); \ ++ const struct r_found_version *v = NULL; \ ++ if ((version) != NULL && (version)->hash != 0) \ ++ v = (version); \ ++ _lr = _dl_lookup_symbol_x ((const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name, \ ++ l, (ref), scope, v, _tc, \ ++ DL_LOOKUP_ADD_DEPENDENCY \ ++ | DL_LOOKUP_FOR_RELOCATE, NULL); \ ++ l->l_lookup_cache.ret = (*ref); \ ++ l->l_lookup_cache.value = _lr; })) \ ++ : l) ++ ++#include "dynamic-link.h" ++ + void + _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) +@@ -243,36 +269,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + { + /* Do the actual relocation of the object's GOT and other data. */ + +- /* String table object symbols. */ +- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); +- +- /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ +-#define RESOLVE_MAP(ref, version, r_type) \ +- ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \ +- && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \ +- ? ((__builtin_expect ((*ref) == l->l_lookup_cache.sym, 0) \ +- && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \ +- ? (bump_num_cache_relocations (), \ +- (*ref) = l->l_lookup_cache.ret, \ +- l->l_lookup_cache.value) \ +- : ({ lookup_t _lr; \ +- int _tc = elf_machine_type_class (r_type); \ +- l->l_lookup_cache.type_class = _tc; \ +- l->l_lookup_cache.sym = (*ref); \ +- const struct r_found_version *v = NULL; \ +- if ((version) != NULL && (version)->hash != 0) \ +- v = (version); \ +- _lr = _dl_lookup_symbol_x (strtab + (*ref)->st_name, l, (ref), \ +- scope, v, _tc, \ +- DL_LOOKUP_ADD_DEPENDENCY \ +- | DL_LOOKUP_FOR_RELOCATE, NULL); \ +- l->l_lookup_cache.ret = (*ref); \ +- l->l_lookup_cache.value = _lr; })) \ +- : l) +- +-#include "dynamic-link.h" +- +- ELF_DYNAMIC_RELOCATE (l, lazy, consider_profiling, skip_ifunc); ++ ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); + + #ifndef PROF + if (__glibc_unlikely (consider_profiling) +diff --git a/elf/do-rel.h b/elf/do-rel.h +index 321ac2b359c1028c..f441b749190c2641 100644 +--- a/elf/do-rel.h ++++ b/elf/do-rel.h +@@ -37,8 +37,8 @@ + relocations; they should be set up to call _dl_runtime_resolve, rather + than fully resolved now. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_dynamic_do_Rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) reladdr, ElfW(Addr) relsize, + __typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative, + int lazy, int skip_ifunc) +@@ -68,13 +68,13 @@ elf_dynamic_do_Rel (struct link_map *map, + } + else + # endif +- elf_machine_lazy_rel (map, l_addr, r, skip_ifunc); ++ elf_machine_lazy_rel (map, scope, l_addr, r, skip_ifunc); + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) +- elf_machine_lazy_rel (map, l_addr, r2, skip_ifunc); ++ elf_machine_lazy_rel (map, scope, l_addr, r2, skip_ifunc); + # endif + } + else +@@ -134,7 +134,7 @@ elf_dynamic_do_Rel (struct link_map *map, + #endif + + ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r->r_offset), skip_ifunc); + } +@@ -146,7 +146,7 @@ elf_dynamic_do_Rel (struct link_map *map, + { + ElfW(Half) ndx + = version[ELFW(R_SYM) (r2->r_info)] & 0x7fff; +- elf_machine_rel (map, r2, ++ elf_machine_rel (map, scope, r2, + &symtab[ELFW(R_SYM) (r2->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r2->r_offset), +@@ -167,14 +167,14 @@ elf_dynamic_do_Rel (struct link_map *map, + } + else + # endif +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, + (void *) (l_addr + r->r_offset), skip_ifunc); + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) +- elf_machine_rel (map, r2, &symtab[ELFW(R_SYM) (r2->r_info)], ++ elf_machine_rel (map, scope, r2, &symtab[ELFW(R_SYM) (r2->r_info)], + NULL, (void *) (l_addr + r2->r_offset), + skip_ifunc); + # endif +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 3eb24ba3a6cee40b..7cc30211649d3820 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -59,31 +59,33 @@ int _dl_try_allocate_static_tls (struct link_map *map, bool optional) + copying memory, breaking the very code written to handle the + unaligned cases. */ + # if ! ELF_MACHINE_NO_REL +-auto inline void __attribute__((always_inline)) +-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++static inline void __attribute__((always_inline)) ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rel) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc); +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) + elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + void *const reloc_addr); + # endif + # if ! ELF_MACHINE_NO_RELA +-auto inline void __attribute__((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr, int skip_ifunc); +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *const reloc_addr, ++ int skip_ifunc); ++static inline void __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr); + # endif + # if ELF_MACHINE_NO_RELA || defined ELF_MACHINE_PLT_REL +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + int skip_ifunc); + # else +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc); + # endif +@@ -114,7 +116,7 @@ elf_machine_lazy_rel (struct link_map *map, + consumes precisely the very end of the DT_REL*, or DT_JMPREL and DT_REL* + are completely separate and there is a gap between them. */ + +-# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, do_lazy, skip_ifunc, test_rel) \ ++# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, scope, do_lazy, skip_ifunc, test_rel) \ + do { \ + struct { ElfW(Addr) start, size; \ + __typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative; int lazy; } \ +@@ -152,18 +154,18 @@ elf_machine_lazy_rel (struct link_map *map, + } \ + \ + if (ELF_DURING_STARTUP) \ +- elf_dynamic_do_##reloc ((map), ranges[0].start, ranges[0].size, \ +- ranges[0].nrelative, 0, skip_ifunc); \ ++ elf_dynamic_do_##reloc ((map), scope, ranges[0].start, ranges[0].size, \ ++ ranges[0].nrelative, 0, skip_ifunc); \ + else \ + { \ + int ranges_index; \ + for (ranges_index = 0; ranges_index < 2; ++ranges_index) \ +- elf_dynamic_do_##reloc ((map), \ ++ elf_dynamic_do_##reloc ((map), scope, \ + ranges[ranges_index].start, \ + ranges[ranges_index].size, \ + ranges[ranges_index].nrelative, \ + ranges[ranges_index].lazy, \ +- skip_ifunc); \ ++ skip_ifunc); \ + } \ + } while (0) + +@@ -175,29 +177,29 @@ elf_machine_lazy_rel (struct link_map *map, + + # if ! ELF_MACHINE_NO_REL + # include "do-rel.h" +-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) \ +- _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, lazy, skip_ifunc, _ELF_CHECK_REL) ++# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) \ ++ _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL) + # else +-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) /* Nothing to do. */ ++# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) /* Nothing to do. */ + # endif + + # if ! ELF_MACHINE_NO_RELA + # define DO_RELA + # include "do-rel.h" +-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) \ +- _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, lazy, skip_ifunc, _ELF_CHECK_REL) ++# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) \ ++ _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL) + # else +-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) /* Nothing to do. */ ++# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) /* Nothing to do. */ + # endif + + /* This can't just be an inline function because GCC is too dumb + to inline functions containing inlines themselves. */ +-# define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile, skip_ifunc) \ ++# define ELF_DYNAMIC_RELOCATE(map, scope, lazy, consider_profile, skip_ifunc) \ + do { \ +- int edr_lazy = elf_machine_runtime_setup ((map), (lazy), \ ++ int edr_lazy = elf_machine_runtime_setup ((map), (scope), (lazy), \ + (consider_profile)); \ +- ELF_DYNAMIC_DO_REL ((map), edr_lazy, skip_ifunc); \ +- ELF_DYNAMIC_DO_RELA ((map), edr_lazy, skip_ifunc); \ ++ ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc); \ ++ ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc); \ + } while (0) + + #endif +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 4aa2058abf6443c9..15c316b38c05a90c 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -16,18 +16,15 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This file is included multiple times and therefore lacks a header +- file inclusion guard. */ ++/* Populate dynamic tags in l_info. */ ++ ++#ifndef _GET_DYNAMIC_INFO_H ++#define _GET_DYNAMIC_INFO_H + + #include + #include + +-#ifndef RESOLVE_MAP +-static +-#else +-auto +-#endif +-inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_get_dynamic_info (struct link_map *l) + { + #if __ELF_NATIVE_CLASS == 32 +@@ -165,3 +162,5 @@ elf_get_dynamic_info (struct link_map *l) + info[DT_RPATH] = NULL; + #endif + } ++ ++#endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 84eac9a8df7125a6..ee45657aeac14f3c 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -502,13 +502,9 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + return start_addr; + } + +-static ElfW(Addr) __attribute_used__ +-_dl_start (void *arg) +-{ + #ifdef DONT_USE_BOOTSTRAP_MAP + # define bootstrap_map GL(dl_rtld_map) + #else +- struct dl_start_final_info info; + # define bootstrap_map info.l + #endif + +@@ -517,13 +513,16 @@ _dl_start (void *arg) + Since ld.so must not have any undefined symbols the result + is trivial: always the map of ld.so itself. */ + #define RTLD_BOOTSTRAP +-#define BOOTSTRAP_MAP (&bootstrap_map) +-#define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + ++static ElfW(Addr) __attribute_used__ ++_dl_start (void *arg) ++{ + #ifdef DONT_USE_BOOTSTRAP_MAP + rtld_timer_start (&start_time); + #else ++ struct dl_start_final_info info; + rtld_timer_start (&info.start_time); + #endif + +@@ -557,7 +556,7 @@ _dl_start (void *arg) + #endif + + #ifdef ELF_MACHINE_BEFORE_RTLD_RELOC +- ELF_MACHINE_BEFORE_RTLD_RELOC (bootstrap_map.l_info); ++ ELF_MACHINE_BEFORE_RTLD_RELOC (&bootstrap_map, bootstrap_map.l_info); + #endif + + if (bootstrap_map.l_addr || ! bootstrap_map.l_info[VALIDX(DT_GNU_PRELINKED)]) +@@ -565,7 +564,7 @@ _dl_start (void *arg) + /* Relocate ourselves so we can do normal function calls and + data access using the global offset table. */ + +- ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0, 0); ++ ELF_DYNAMIC_RELOCATE (&bootstrap_map, NULL, 0, 0, 0); + } + bootstrap_map.l_relocated = 1; + +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index d29d827ab32a78ee..34c0790b893a529b 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -65,7 +65,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (l->l_info[DT_JMPREL] && lazy) + { +@@ -243,10 +244,11 @@ elf_machine_plt_value (struct link_map *map, + + #ifdef RESOLVE_MAP + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = reloc_addr_arg; +@@ -259,7 +261,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const ElfW(Sym) *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -383,9 +386,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, + *reloc_addr = l_addr + reloc->r_addend; + } + +-inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, + int skip_ifunc) +@@ -412,7 +415,7 @@ elf_machine_lazy_rel (struct link_map *map, + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + version = &map->l_versions[vernum[symndx] & 0x7fff]; + } +- elf_machine_rela (map, reloc, sym, version, reloc_addr, ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, + skip_ifunc); + return; + } +@@ -439,7 +442,8 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, ++ skip_ifunc); + } + else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE))) + { +diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h +index 2cd2213d9ab25287..66e1db524bb378f6 100644 +--- a/sysdeps/alpha/dl-machine.h ++++ b/sysdeps/alpha/dl-machine.h +@@ -70,7 +70,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern char _dl_runtime_resolve_new[] attribute_hidden; + extern char _dl_runtime_profile_new[] attribute_hidden; +@@ -361,9 +362,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -411,7 +412,8 @@ elf_machine_rela (struct link_map *map, + return; + else + { +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf64_Addr sym_value; + Elf64_Addr sym_raw_value; + +@@ -489,7 +491,7 @@ elf_machine_rela (struct link_map *map, + can be skipped. */ + #define ELF_MACHINE_REL_RELATIVE 1 + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -506,9 +508,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + memcpy (reloc_addr_arg, &reloc_addr_val, 8); + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/arc/dl-machine.h b/sysdeps/arc/dl-machine.h +index e6ce7f0ff6d9ac34..4b64ffec256b7f3b 100644 +--- a/sysdeps/arc/dl-machine.h ++++ b/sysdeps/arc/dl-machine.h +@@ -122,7 +122,8 @@ elf_machine_load_address (void) + + static inline int + __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (void); + +@@ -228,10 +229,11 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, + + #ifdef RESOLVE_MAP + +-inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + ElfW(Addr) r_info = reloc->r_info; +@@ -245,7 +247,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const ElfW(Sym) *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -326,8 +329,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + + inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, +- const ElfW(Rela) *reloc, int skip_ifunc) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) l_addr, const ElfW(Rela) *reloc, ++ int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index ff5e09e207f7986b..7e6761bbe87540d5 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -84,7 +84,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -303,7 +304,7 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + + #ifdef RESOLVE_MAP + /* Handle a PC24 reloc, including the out-of-range case. */ +-auto void ++static void + relocate_pc24 (struct link_map *map, Elf32_Addr value, + Elf32_Addr *const reloc_addr, Elf32_Sword addend) + { +@@ -357,10 +358,11 @@ relocate_pc24 (struct link_map *map, Elf32_Addr value, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rel *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -391,7 +393,8 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + #endif + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -535,10 +538,11 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -553,7 +557,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + # ifndef RESOLVE_CONFLICT_FIND_MAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -628,7 +632,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + # endif + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + void *const reloc_addr_arg) +@@ -638,7 +642,7 @@ elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -648,9 +652,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + } + # endif + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rel *reloc, + int skip_ifunc) + { +@@ -680,7 +684,7 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ +- elf_machine_rel (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rel (map, scope, reloc, sym, version, reloc_addr, skip_ifunc); + } + else + _dl_reloc_bad_type (map, r_type, 1); +diff --git a/sysdeps/csky/dl-machine.h b/sysdeps/csky/dl-machine.h +index b08f06d74ca6f8d1..ec22f875772b1291 100644 +--- a/sysdeps/csky/dl-machine.h ++++ b/sysdeps/csky/dl-machine.h +@@ -58,7 +58,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -215,9 +216,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -230,7 +232,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + opcode16_addr = (unsigned short *)reloc_addr; + +@@ -331,7 +334,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -339,8 +342,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index 24f0f47d8f1e25cd..088931f67065250c 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -70,8 +70,8 @@ __hppa_init_bootstrap_fdesc_table (struct link_map *map) + map->l_mach.fptr_table = boot_table; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ +- __hppa_init_bootstrap_fdesc_table (BOOTSTRAP_MAP); \ ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \ ++ __hppa_init_bootstrap_fdesc_table (map); \ + _dl_fptr_init(); + + /* Return nonzero iff ELF header is compatible with the running host. */ +@@ -182,7 +182,8 @@ elf_machine_main_map (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got = NULL; + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type, r_sym; +@@ -564,8 +565,8 @@ dl_platform_init (void) + ( (((as14) & 0x1fff) << 1) \ + | (((as14) & 0x2000) >> 13)) + +-auto void __attribute__((always_inline)) +-elf_machine_rela (struct link_map *map, ++static void __attribute__((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf32_Rela *reloc, + const Elf32_Sym *sym, + const struct r_found_version *version, +@@ -594,11 +595,9 @@ elf_machine_rela (struct link_map *map, + zeros, and an all zero Elf32_Sym has a binding of STB_LOCAL.) + See RESOLVE_MAP definition in elf/dl-reloc.c */ + # ifdef RTLD_BOOTSTRAP +- /* RESOLVE_MAP in rtld.c doesn't have the local sym test. */ +- sym_map = (ELF32_ST_BIND (sym->st_info) != STB_LOCAL +- ? RESOLVE_MAP (&sym, version, r_type) : map); ++ sym_map = map; + # else +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + # endif + + if (sym_map) +@@ -756,7 +755,7 @@ elf_machine_rela (struct link_map *map, + + /* hppa doesn't have an R_PARISC_RELATIVE reloc, but uses relocs with + ELF32_R_SYM (info) == 0 for a similar purpose. */ +-auto void __attribute__((always_inline)) ++static void __attribute__((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, + const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -809,8 +808,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, + *reloc_addr = value; + } + +-auto void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index 590b41d8d7e35005..78ce890c0ff333ca 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -61,7 +61,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden; +@@ -291,9 +292,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rel *reloc, + const Elf32_Sym *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { +@@ -327,7 +329,8 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + # ifndef RTLD_BOOTSTRAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -498,10 +501,11 @@ and creates an unsatisfiable circular dependency.\n", + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -514,7 +518,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + # ifndef RESOLVE_CONFLICT_FIND_MAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -647,7 +652,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + # endif /* !RTLD_BOOTSTRAP */ + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + void *const reloc_addr_arg) +@@ -658,7 +663,7 @@ elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -668,9 +673,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + } + # endif /* !RTLD_BOOTSTRAP */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rel *reloc, + int skip_ifunc) + { +@@ -705,13 +710,13 @@ elf_machine_lazy_rel (struct link_map *map, + const ElfW(Half) *const version = + (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r->r_offset), skip_ifunc); + } + # ifndef RTLD_BOOTSTRAP + else +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, + (void *) (l_addr + r->r_offset), skip_ifunc); + # endif + } +@@ -728,9 +733,9 @@ elf_machine_lazy_rel (struct link_map *map, + + # ifndef RTLD_BOOTSTRAP + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rela (struct link_map *map, ++elf_machine_lazy_rela (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +@@ -754,7 +759,8 @@ elf_machine_lazy_rela (struct link_map *map, + + /* Always initialize TLS descriptors completely at load time, in + case static TLS is allocated for it that requires locking. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, ++ skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) + { +diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h +index 4403e7767af83546..2217d0b556c17683 100644 +--- a/sysdeps/ia64/dl-machine.h ++++ b/sysdeps/ia64/dl-machine.h +@@ -44,8 +44,8 @@ __ia64_init_bootstrap_fdesc_table (struct link_map *map) + map->l_mach.fptr_table = boot_table; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ +- __ia64_init_bootstrap_fdesc_table (BOOTSTRAP_MAP); ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \ ++ __ia64_init_bootstrap_fdesc_table (map); + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -98,7 +98,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (void); + extern void _dl_runtime_profile (void); +@@ -371,9 +372,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + + /* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rela (struct link_map *map, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -414,10 +415,11 @@ elf_machine_rela (struct link_map *map, + return; + else + { +- struct link_map *sym_map; ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + /* RESOLVE_MAP() will return NULL if it fail to locate the symbol. */ +- if ((sym_map = RESOLVE_MAP (&sym, version, r_type))) ++ if (sym_map != NULL) + { + value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; + +@@ -476,7 +478,7 @@ elf_machine_rela (struct link_map *map, + can be skipped. */ + #define ELF_MACHINE_REL_RELATIVE 1 + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -489,9 +491,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + } + + /* Perform a RELATIVE reloc on the .got entry that transfers to the .plt. */ +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h +index 86a8c67e2a1b9f77..5e34c4784e348b19 100644 +--- a/sysdeps/m68k/dl-machine.h ++++ b/sysdeps/m68k/dl-machine.h +@@ -68,7 +68,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -215,9 +216,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -228,7 +230,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -303,7 +306,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -311,8 +314,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/microblaze/dl-machine.h b/sysdeps/microblaze/dl-machine.h +index e460f6f195561da1..3fd4988e6093be1c 100644 +--- a/sysdeps/microblaze/dl-machine.h ++++ b/sysdeps/microblaze/dl-machine.h +@@ -69,7 +69,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + extern void _dl_runtime_profile (Elf32_Word); +@@ -207,9 +208,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + ((unsigned short *)(rel_addr))[3] = (val) & 0xffff; \ + } while (0) + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -222,7 +224,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + value += reloc->r_addend; +@@ -277,7 +280,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -285,8 +288,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + PUT_REL_64 (reloc_addr, l_addr + reloc->r_addend); + } + +-auto inline void +-elf_machine_lazy_rel (struct link_map *map, ++static inline void ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/mips/dl-machine.h b/sysdeps/mips/dl-machine.h +index d9c6d33d0cbf1f50..7a821ceb8e518cef 100644 +--- a/sysdeps/mips/dl-machine.h ++++ b/sysdeps/mips/dl-machine.h +@@ -188,9 +188,9 @@ elf_machine_load_address (void) + + /* We can't rely on elf_machine_got_rel because _dl_object_relocation_scope + fiddles with global data. */ +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(bootstrap_map, dynamic_info) \ + do { \ +- struct link_map *map = BOOTSTRAP_MAP; \ ++ struct link_map *map = bootstrap_map; \ + ElfW(Sym) *sym; \ + ElfW(Addr) *got; \ + int i, n; \ +@@ -475,11 +475,12 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rel) *reloc, + by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *reloc_addr, ElfW(Addr) r_addend, int inplace_p) ++elf_machine_reloc (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) r_info, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *reloc_addr, ++ ElfW(Addr) r_addend, int inplace_p) + { + const unsigned long int r_type = ELFW(R_TYPE) (r_info); + ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr; +@@ -507,7 +508,8 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + case R_MIPS_TLS_TPREL32: + # endif + { +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + switch (r_type) + { +@@ -647,7 +649,7 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + _dl_signal_error (0, map->l_name, NULL, + "found jump slot relocation with non-zero addend"); + +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + *addr_field = value; + +@@ -661,7 +663,7 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + ElfW(Addr) value; + + /* Calculate the address of the symbol. */ +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (__builtin_expect (sym == NULL, 0)) +@@ -708,16 +710,17 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr, int skip_ifunc) ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rel) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *const reloc_addr, ++ int skip_ifunc) + { +- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr, 0, 1); ++ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr, 0, 1); + } + +-auto inline void ++static inline void + __attribute__((always_inline)) + elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + void *const reloc_addr) +@@ -725,9 +728,9 @@ elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + /* XXX Nothing to do. There is no relative relocation, right? */ + } + +-auto inline void ++static inline void + __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + int skip_ifunc) + { +@@ -748,17 +751,17 @@ elf_machine_lazy_rel (struct link_map *map, + _dl_reloc_bad_type (map, r_type, 1); + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], const ElfW(Rela) *reloc, + const ElfW(Sym) *sym, const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc) + { +- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr, ++ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr, + reloc->r_addend, 0); + } + +-auto inline void ++static inline void + __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr) +@@ -767,9 +770,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + + #ifndef RTLD_BOOTSTRAP + /* Relocate GOT. */ +-auto inline void ++static inline void + __attribute__((always_inline)) +-elf_machine_got_rel (struct link_map *map, int lazy) ++elf_machine_got_rel (struct link_map *map, struct r_scope_elem *scope[], int lazy) + { + ElfW(Addr) *got; + ElfW(Sym) *sym; +@@ -782,7 +785,7 @@ elf_machine_got_rel (struct link_map *map, int lazy) + const struct r_found_version *version __attribute__ ((unused)) \ + = vernum ? &map->l_versions[vernum[sym_index] & 0x7fff] : NULL; \ + struct link_map *sym_map; \ +- sym_map = RESOLVE_MAP (&ref, version, reloc); \ ++ sym_map = RESOLVE_MAP (map, scope, &ref, version, reloc); \ + SYMBOL_ADDRESS (sym_map, ref, true); \ + }) + +@@ -868,9 +871,10 @@ elf_machine_got_rel (struct link_map *map, int lazy) + /* Set up the loaded object described by L so its stub function + will jump to the on-demand fixup code __dl_runtime_resolve. */ + +-auto inline int ++static inline int + __attribute__((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + # ifndef RTLD_BOOTSTRAP + ElfW(Addr) *got; +@@ -900,7 +904,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + } + + /* Relocate global offset table. */ +- elf_machine_got_rel (l, lazy); ++ elf_machine_got_rel (l, scope, lazy); + + /* If using PLTs, fill in the first two entries of .got.plt. */ + if (l->l_info[DT_JMPREL] && lazy) +diff --git a/sysdeps/nios2/dl-machine.h b/sysdeps/nios2/dl-machine.h +index e000cd081f18a12b..4de602b13d5500f6 100644 +--- a/sysdeps/nios2/dl-machine.h ++++ b/sysdeps/nios2/dl-machine.h +@@ -67,7 +67,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + +@@ -234,10 +235,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr_arg, int skip_ifunc) ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, ++ void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; + const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); +@@ -249,7 +251,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -314,7 +317,7 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + } + } + +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) + { +@@ -322,8 +325,8 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index b93cf486b6cda5fd..cda012dc1b822254 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -170,7 +170,7 @@ extern int __elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile); + + static inline int +-elf_machine_runtime_setup (struct link_map *map, ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], + int lazy, int profile) + { + if (map->l_info[DT_JMPREL] == 0) +@@ -284,9 +284,10 @@ extern void _dl_reloc_overflow (struct link_map *map, + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -315,7 +316,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + value += reloc->r_addend; +@@ -439,7 +440,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((always_inline)) ++static inline void __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -447,8 +448,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index b3f3352bcf5a52b0..3f92fbb369eb5023 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -343,7 +343,8 @@ dl_platform_init (void) + /* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (map->l_info[DT_JMPREL]) + { +@@ -618,7 +619,7 @@ extern void attribute_hidden _dl_reloc_overflow (struct link_map *map, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym); + +-auto inline void __attribute__ ((always_inline)) ++static inline void __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -627,7 +628,7 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + } + + /* This computes the value used by TPREL* relocs. */ +-auto inline Elf64_Addr __attribute__ ((always_inline, const)) ++static inline Elf64_Addr __attribute__ ((always_inline, const)) + elf_machine_tprel (struct link_map *map, + struct link_map *sym_map, + const Elf64_Sym *sym, +@@ -646,7 +647,7 @@ elf_machine_tprel (struct link_map *map, + } + + /* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */ +-auto inline Elf64_Addr __attribute__ ((always_inline)) ++static inline Elf64_Addr __attribute__ ((always_inline)) + resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) + { +@@ -676,8 +677,8 @@ resolve_ifunc (Elf64_Addr value, + + /* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -705,7 +706,7 @@ elf_machine_rela (struct link_map *map, + + /* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt + and STT_GNU_IFUNC. */ +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; + + if (sym != NULL +@@ -1035,8 +1036,8 @@ elf_machine_rela (struct link_map *map, + MODIFIED_CODE_NOQUEUE (reloc_addr); + } + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/riscv/dl-machine.h b/sysdeps/riscv/dl-machine.h +index 951268923da26a37..343c0feb6b437001 100644 +--- a/sysdeps/riscv/dl-machine.h ++++ b/sysdeps/riscv/dl-machine.h +@@ -168,17 +168,18 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, + by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc) + { + ElfW(Addr) r_info = reloc->r_info; + const unsigned long int r_type = ELFW (R_TYPE) (r_info); + ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr; + const ElfW(Sym) *const __attribute__ ((unused)) refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + ElfW(Addr) value = 0; + if (sym_map != NULL) + value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; +@@ -286,7 +287,7 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr) +@@ -294,10 +295,11 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *(ElfW(Addr) *) reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, +- const ElfW(Rela) *reloc, int skip_ifunc) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) l_addr, const ElfW(Rela) *reloc, ++ int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); +@@ -327,9 +329,10 @@ elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, + /* Set up the loaded object described by L so its stub function + will jump to the on-demand fixup code __dl_runtime_resolve. */ + +-auto inline int ++static inline int + __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + #ifndef RTLD_BOOTSTRAP + /* If using PLTs, fill in the first two entries of .got.plt. */ +diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h +index d0ccd69261c8f55b..96a5e80c846c816a 100644 +--- a/sysdeps/s390/s390-32/dl-machine.h ++++ b/sysdeps/s390/s390-32/dl-machine.h +@@ -85,7 +85,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + extern void _dl_runtime_profile (Elf32_Word); +@@ -321,10 +322,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -357,7 +359,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + /* Only needed for R_390_COPY below. */ + const Elf32_Sym *const refsym = sym; + #endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -484,7 +487,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -493,9 +496,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h +index 543361c83637c071..c94d09b9c8512738 100644 +--- a/sysdeps/s390/s390-64/dl-machine.h ++++ b/sysdeps/s390/s390-64/dl-machine.h +@@ -75,7 +75,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf64_Word); + extern void _dl_runtime_profile (Elf64_Word); +@@ -268,10 +269,11 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, +- const Elf64_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf64_Rela *reloc, const Elf64_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf64_Addr *const reloc_addr = reloc_addr_arg; +@@ -304,7 +306,8 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + /* Only needed for R_390_COPY below. */ + const Elf64_Sym *const refsym = sym; + #endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -438,7 +441,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -447,9 +450,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h +index 122b417a17e2ef9b..0c22dfd8487a516e 100644 +--- a/sysdeps/sh/dl-machine.h ++++ b/sysdeps/sh/dl-machine.h +@@ -69,7 +69,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -259,10 +260,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -318,7 +320,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + value = SYMBOL_ADDRESS (sym_map, sym, true); + value += reloc->r_addend; +@@ -424,7 +427,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -443,9 +446,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + #undef COPY_UNALIGNED_WORD + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h +index 0269e458ea2b3bca..6361cfae9eb8fa58 100644 +--- a/sysdeps/sparc/sparc32/dl-machine.h ++++ b/sysdeps/sparc/sparc32/dl-machine.h +@@ -97,7 +97,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *plt; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -327,10 +328,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -381,7 +383,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + #else +@@ -536,7 +538,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -545,9 +547,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr += l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h +index bbd4566d8a595f93..3fd18c6e5ef21e38 100644 +--- a/sysdeps/sparc/sparc64/dl-machine.h ++++ b/sysdeps/sparc/sparc64/dl-machine.h +@@ -126,7 +126,8 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (l->l_info[DT_JMPREL] && lazy) + { +@@ -354,10 +355,11 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, +- const Elf64_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf64_Rela *reloc, const Elf64_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf64_Addr *const reloc_addr = reloc_addr_arg; +@@ -408,7 +410,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + #else +@@ -646,7 +648,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -655,9 +657,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index a8596aa3fa489eff..d3fcbb37bf1f4f7c 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -62,7 +62,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf64_Addr *got; + extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden; +@@ -258,12 +259,11 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void +-__attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr_arg, int skip_ifunc) +-{ ++static inline void __attribute__((always_inline)) ++elf_machine_rela(struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, ++ void *const reloc_addr_arg, int skip_ifunc) { + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info); + +@@ -300,7 +300,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + # ifndef RTLD_BOOTSTRAP + const ElfW(Sym) *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -525,7 +526,7 @@ and creates an unsatisfiable circular dependency.\n", + } + } + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +@@ -544,9 +545,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + } + } + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc) + { +@@ -580,7 +581,7 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely at load time, in + case static TLS is allocated for it that requires locking. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_X86_64_IRELATIVE)) + { diff --git a/SOURCES/glibc-upstream-2.34-137.patch b/SOURCES/glibc-upstream-2.34-137.patch new file mode 100644 index 0000000..06aaa91 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-137.patch @@ -0,0 +1,241 @@ +commit c6df39a0bd2aafd2a4280a0000ef201f30273bee +Author: Adhemerval Zanella +Date: Mon Oct 11 16:01:49 2021 -0300 + + elf: Fix elf_get_dynamic_info definition + + Before to 490e6c62aa31a8a ('elf: Avoid nested functions in the loader + [BZ #27220]'), elf_get_dynamic_info() was defined twice on rtld.c: on + the first dynamic-link.h include and later within _dl_start(). The + former definition did not define DONT_USE_BOOTSTRAP_MAP and it is used + on setup_vdso() (since it is a global definition), while the former does + define DONT_USE_BOOTSTRAP_MAP and it is used on loader self-relocation. + + With the commit change, the function is now included and defined once + instead of defined as a nested function. So rtld.c defines without + defining RTLD_BOOTSTRAP and it brokes at least powerpc32. + + This patch fixes by moving the get-dynamic-info.h include out of + dynamic-link.h, which then the caller can corirectly set the expected + semantic by defining STATIC_PIE_BOOTSTRAP, RTLD_BOOTSTRAP, and/or + RESOLVE_MAP. + + It also required to enable some asserts only for the loader bootstrap + to avoid issues when called from setup_vdso(). + + As a side note, this is another issues with nested functions: it is + not clear from pre-processed output (-E -dD) how the function will + be build and its semantic (since nested function will be local and + extra C defines may change it). + + I checked on x86_64-linux-gnu (w/o --enable-static-pie), + i686-linux-gnu, powerpc64-linux-gnu, powerpc-linux-gnu-power4, + aarch64-linux-gnu, arm-linux-gnu, sparc64-linux-gnu, and + s390x-linux-gnu. + + Reviewed-by: Fangrui Song + (cherry picked from commit 4af6982e4c9fc465ffb7a54b794aaaa134241f05) + + Resolved conflicts: + elf/rtld.c + +diff --git a/elf/dl-conflict.c b/elf/dl-conflict.c +index 5c8e51d19ae095d6..d54356dee3f86ae0 100644 +--- a/elf/dl-conflict.c ++++ b/elf/dl-conflict.c +@@ -17,6 +17,7 @@ + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + ++#include + #include + #include + #include +diff --git a/elf/dl-load.c b/elf/dl-load.c +index 0976977fbdf21902..eea06629a978aaf3 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -58,6 +58,7 @@ struct filebuf + }; + + #include "dynamic-link.h" ++#include "get-dynamic-info.h" + #include + #include + #include +@@ -1295,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index a52ba8aeb8b573cb..f323b4dd0d5ba279 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -28,6 +28,7 @@ + #define STATIC_PIE_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" ++#include "get-dynamic-info.h" + + /* Relocate static executable with PIE. */ + +@@ -51,7 +52,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index 9d0d941000f6114f..61c260ddb81b586c 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -19,6 +19,7 @@ + #define IN_DL_RUNTIME 1 /* This can be tested in dl-machine.h. */ + + #include ++#include + #include + #include + #include +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 7cc30211649d3820..21cdfc88bbfb89ea 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -93,7 +93,6 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + + #include + +-#include "get-dynamic-info.h" + + #ifdef RESOLVE_MAP + +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 15c316b38c05a90c..d169099fbc9897cf 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -25,7 +25,7 @@ + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l) ++elf_get_dynamic_info (struct link_map *l, bool check) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -112,16 +112,19 @@ elf_get_dynamic_info (struct link_map *l) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif + #ifdef RTLD_BOOTSTRAP +- /* Only the bind now flags are allowed. */ +- assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL +- || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); +- /* Flags must not be set for ld.so. */ +- assert (info[DT_FLAGS] == NULL +- || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); +-#endif +-#if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); ++ if (check) ++ { ++ /* Only the bind now flags are allowed. */ ++ assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL ++ || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); ++ /* Flags must not be set for ld.so. */ ++ assert (info[DT_FLAGS] == NULL ++ || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); ++# ifdef STATIC_PIE_BOOTSTRAP ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); ++# endif ++ } + #else + if (info[DT_FLAGS] != NULL) + { +diff --git a/elf/rtld.c b/elf/rtld.c +index ee45657aeac14f3c..352d596dedb42e79 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -514,6 +514,7 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + is trivial: always the map of ld.so itself. */ + #define RTLD_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map ++#include "get-dynamic-info.h" + #include "dynamic-link.h" + + static ElfW(Addr) __attribute_used__ +@@ -549,7 +550,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map); ++ elf_get_dynamic_info (&bootstrap_map, true); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1653,7 +1654,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index f44748bc9858e5fd..6fdffafcca5e9916 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false); + _dl_setup_hash (l); + l->l_relocated = 1; + +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index 7e6761bbe87540d5..86f866ca7c17bd9b 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -21,6 +21,7 @@ + + #define ELF_MACHINE_NAME "ARM" + ++#include + #include + #include + #include +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index 78ce890c0ff333ca..fa902612ca8557f9 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -21,6 +21,7 @@ + + #define ELF_MACHINE_NAME "i386" + ++#include + #include + #include + #include +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index d3fcbb37bf1f4f7c..90c77cfea1de8d63 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -22,6 +22,7 @@ + + #define ELF_MACHINE_NAME "x86_64" + ++#include + #include + #include + #include diff --git a/SOURCES/glibc-upstream-2.34-138.patch b/SOURCES/glibc-upstream-2.34-138.patch new file mode 100644 index 0000000..778ada9 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-138.patch @@ -0,0 +1,1115 @@ +commit b868b45f6763a4adc4aa93248be9f84480768fcf +Author: Adhemerval Zanella +Date: Wed Oct 13 09:49:34 2021 -0300 + + elf: Fix dynamic-link.h usage on rtld.c + + The 4af6982e4c fix does not fully handle RTLD_BOOTSTRAP usage on + rtld.c due two issues: + + 1. RTLD_BOOTSTRAP is also used on dl-machine.h on various + architectures and it changes the semantics of various machine + relocation functions. + + 2. The elf_get_dynamic_info() change was done sideways, previously + to 490e6c62aa get-dynamic-info.h was included by the first + dynamic-link.h include *without* RTLD_BOOTSTRAP being defined. + It means that the code within elf_get_dynamic_info() that uses + RTLD_BOOTSTRAP is in fact unused. + + To fix 1. this patch now includes dynamic-link.h only once with + RTLD_BOOTSTRAP defined. The ELF_DYNAMIC_RELOCATE call will now have + the relocation fnctions with the expected semantics for the loader. + + And to fix 2. part of 4af6982e4c is reverted (the check argument + elf_get_dynamic_info() is not required) and the RTLD_BOOTSTRAP + pieces are removed. + + To reorganize the includes the static TLS definition is moved to + its own header to avoid a circular dependency (it is defined on + dynamic-link.h and dl-machine.h requires it at same time other + dynamic-link.h definition requires dl-machine.h defitions). + + Also ELF_MACHINE_NO_REL, ELF_MACHINE_NO_RELA, and ELF_MACHINE_PLT_REL + are moved to its own header. Only ancient ABIs need special values + (arm, i386, and mips), so a generic one is used as default. + + The powerpc Elf64_FuncDesc is also moved to its own header, since + csu code required its definition (which would require either include + elf/ folder or add a full path with elf/). + + Checked on x86_64, i686, aarch64, armhf, powerpc64, powerpc32, + and powerpc64le. + + Reviewed-by: Szabolcs Nagy + (cherry picked from commit d6d89608ac8cf2b37c75debad1fff653f6939f90) + + Resolved conflicts: + elf/rtld.c + +Conflicts: + elf/rtld.c - Manual merge around dl-execve.h include. + +diff --git a/elf/dl-load.c b/elf/dl-load.c +index eea06629a978aaf3..fb3da5aa565908a6 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1296,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l, false); ++ elf_get_dynamic_info (l); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index f323b4dd0d5ba279..ababafcf98f9945d 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -52,7 +52,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map, false); ++ elf_get_dynamic_info (main_map); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/dl-static-tls.h b/elf/dl-static-tls.h +new file mode 100644 +index 0000000000000000..730924fc0155acb7 +--- /dev/null ++++ b/elf/dl-static-tls.h +@@ -0,0 +1,51 @@ ++/* Inline functions for dynamic linking. ++ Copyright (C) 1995-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_STATIC_TLS_H ++#define _DL_STATIC_TLS_H ++ ++/* This macro is used as a callback from elf_machine_rel{a,} when a ++ static TLS reloc is about to be performed. Since (in dl-load.c) we ++ permit dynamic loading of objects that might use such relocs, we ++ have to check whether each use is actually doable. If the object ++ whose TLS segment the reference resolves to was allocated space in ++ the static TLS block at startup, then it's ok. Otherwise, we make ++ an attempt to allocate it in surplus space on the fly. If that ++ can't be done, we fall back to the error that DF_STATIC_TLS is ++ intended to produce. */ ++#define HAVE_STATIC_TLS(map, sym_map) \ ++ (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \ ++ && ((sym_map)->l_tls_offset \ ++ != FORCED_DYNAMIC_TLS_OFFSET), 1)) ++ ++#define CHECK_STATIC_TLS(map, sym_map) \ ++ do { \ ++ if (!HAVE_STATIC_TLS (map, sym_map)) \ ++ _dl_allocate_static_tls (sym_map); \ ++ } while (0) ++ ++#define TRY_STATIC_TLS(map, sym_map) \ ++ (__builtin_expect ((sym_map)->l_tls_offset \ ++ != FORCED_DYNAMIC_TLS_OFFSET, 1) \ ++ && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \ ++ || _dl_try_allocate_static_tls (sym_map, true) == 0)) ++ ++int _dl_try_allocate_static_tls (struct link_map *map, bool optional) ++ attribute_hidden; ++ ++#endif +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 21cdfc88bbfb89ea..ac4cc70dea3da763 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -16,35 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This macro is used as a callback from elf_machine_rel{a,} when a +- static TLS reloc is about to be performed. Since (in dl-load.c) we +- permit dynamic loading of objects that might use such relocs, we +- have to check whether each use is actually doable. If the object +- whose TLS segment the reference resolves to was allocated space in +- the static TLS block at startup, then it's ok. Otherwise, we make +- an attempt to allocate it in surplus space on the fly. If that +- can't be done, we fall back to the error that DF_STATIC_TLS is +- intended to produce. */ +-#define HAVE_STATIC_TLS(map, sym_map) \ +- (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \ +- && ((sym_map)->l_tls_offset \ +- != FORCED_DYNAMIC_TLS_OFFSET), 1)) +- +-#define CHECK_STATIC_TLS(map, sym_map) \ +- do { \ +- if (!HAVE_STATIC_TLS (map, sym_map)) \ +- _dl_allocate_static_tls (sym_map); \ +- } while (0) +- +-#define TRY_STATIC_TLS(map, sym_map) \ +- (__builtin_expect ((sym_map)->l_tls_offset \ +- != FORCED_DYNAMIC_TLS_OFFSET, 1) \ +- && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \ +- || _dl_try_allocate_static_tls (sym_map, true) == 0)) +- +-int _dl_try_allocate_static_tls (struct link_map *map, bool optional) +- attribute_hidden; +- ++#include + #include + + #ifdef RESOLVE_MAP +@@ -91,9 +63,6 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + # endif + #endif + +-#include +- +- + #ifdef RESOLVE_MAP + + # if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index d169099fbc9897cf..1ac0663d1ff5de24 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -22,10 +22,11 @@ + #define _GET_DYNAMIC_INFO_H + + #include ++#include + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l, bool check) ++elf_get_dynamic_info (struct link_map *l) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -33,7 +34,7 @@ elf_get_dynamic_info (struct link_map *l, bool check) + typedef Elf64_Xword d_tag_utype; + #endif + +-#if !defined RTLD_BOOTSTRAP && !defined STATIC_PIE_BOOTSTRAP ++#ifndef STATIC_PIE_BOOTSTRAP + if (l->l_ld == NULL) + return; + #endif +@@ -111,21 +112,10 @@ elf_get_dynamic_info (struct link_map *l, bool check) + if (info[DT_REL] != NULL) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif +-#ifdef RTLD_BOOTSTRAP +- if (check) +- { +- /* Only the bind now flags are allowed. */ +- assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL +- || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); +- /* Flags must not be set for ld.so. */ +- assert (info[DT_FLAGS] == NULL +- || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); +-# ifdef STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); +-# endif +- } +-#else ++#ifdef STATIC_PIE_BOOTSTRAP ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); ++#endif + if (info[DT_FLAGS] != NULL) + { + /* Flags are used. Translate to the old form where available. +@@ -163,7 +153,6 @@ elf_get_dynamic_info (struct link_map *l, bool check) + if (info[DT_RUNPATH] != NULL) + /* If both RUNPATH and RPATH are given, the latter is ignored. */ + info[DT_RPATH] = NULL; +-#endif + } + + #endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 352d596dedb42e79..37d28d5a66d7b5d6 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -32,7 +32,6 @@ + #include + #include + #include +-#include "dynamic-link.h" + #include + #include + #include +@@ -51,9 +50,18 @@ + #include + #include + #include ++#include + + #include + ++/* This #define produces dynamic linking inline functions for ++ bootstrap relocation instead of general-purpose relocation. ++ Since ld.so must not have any undefined symbols the result ++ is trivial: always the map of ld.so itself. */ ++#define RTLD_BOOTSTRAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map ++#include "dynamic-link.h" ++ + /* Only enables rtld profiling for architectures which provides non generic + hp-timing support. The generic support requires either syscall + (clock_gettime), which will incur in extra overhead on loading time. +@@ -508,15 +516,6 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + # define bootstrap_map info.l + #endif + +- /* This #define produces dynamic linking inline functions for +- bootstrap relocation instead of general-purpose relocation. +- Since ld.so must not have any undefined symbols the result +- is trivial: always the map of ld.so itself. */ +-#define RTLD_BOOTSTRAP +-#define RESOLVE_MAP(map, scope, sym, version, flags) map +-#include "get-dynamic-info.h" +-#include "dynamic-link.h" +- + static ElfW(Addr) __attribute_used__ + _dl_start (void *arg) + { +@@ -550,7 +549,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map, true); ++ elf_get_dynamic_info (&bootstrap_map); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1654,7 +1653,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map, false); ++ elf_get_dynamic_info (main_map); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index 6fdffafcca5e9916..f44748bc9858e5fd 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l, false); ++ elf_get_dynamic_info (l); + _dl_setup_hash (l); + l->l_relocated = 1; + +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index 34c0790b893a529b..07af183e711b50f2 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -24,7 +24,9 @@ + #include + #include + #include ++#include + #include ++#include + #include + + /* Translate a processor specific dynamic tag to the index in l_info array. */ +@@ -196,10 +198,6 @@ _dl_start_user: \n\ + + #define ELF_MACHINE_JMP_SLOT AARCH64_R(JUMP_SLOT) + +-/* AArch64 uses RELA not REL */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + #define DL_PLATFORM_INIT dl_platform_init () + + static inline void __attribute__ ((unused)) +@@ -376,7 +374,7 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + } + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, +diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h +index 66e1db524bb378f6..e948e54fb7223a18 100644 +--- a/sysdeps/alpha/dl-machine.h ++++ b/sysdeps/alpha/dl-machine.h +@@ -26,6 +26,8 @@ + #define ELF_MACHINE_NAME "alpha" + + #include ++#include ++#include + + + /* Mask identifying addresses reserved for the user program, +@@ -241,10 +243,6 @@ $fixup_stack: \n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT + +-/* The alpha never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + * _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/arc/dl-machine.h b/sysdeps/arc/dl-machine.h +index 4b64ffec256b7f3b..f843ed9bd6ff5fc2 100644 +--- a/sysdeps/arc/dl-machine.h ++++ b/sysdeps/arc/dl-machine.h +@@ -30,6 +30,8 @@ + #include + #include + #include ++#include ++#include + + /* Dynamic Linking ABI for ARCv2 ISA. + +@@ -203,10 +205,6 @@ __start: \n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ARC_JUMP_SLOT + +-/* ARC uses Rela relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Fixup a PLT entry to bounce directly to the function at VALUE. */ + + static inline ElfW(Addr) +@@ -318,7 +316,7 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + } + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +@@ -327,7 +325,7 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *reloc_addr += l_addr; + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, +diff --git a/sysdeps/arm/dl-machine-rel.h b/sysdeps/arm/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..bec114706cd027a4 +--- /dev/null ++++ b/sysdeps/arm/dl-machine-rel.h +@@ -0,0 +1,31 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* ARM never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP ++#define ELF_MACHINE_NO_REL 0 ++ ++/* ARM never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_PLT_REL 1 ++ ++#endif +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index 86f866ca7c17bd9b..3239841eb5b36623 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #ifndef CLEAR_CACHE + # error CLEAR_CACHE definition required to handle TEXTREL +@@ -258,10 +260,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ARM_JUMP_SLOT + +-/* ARM never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_PLT_REL 1 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +@@ -294,11 +292,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + #endif /* !dl_machine_h */ + + +-/* ARM never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP +-#define ELF_MACHINE_NO_REL 0 +- + /* Names of the architecture-specific auditing callback functions. */ + #define ARCH_LA_PLTENTER arm_gnu_pltenter + #define ARCH_LA_PLTEXIT arm_gnu_pltexit +diff --git a/sysdeps/csky/dl-machine.h b/sysdeps/csky/dl-machine.h +index ec22f875772b1291..4dfd9578773f1c8e 100644 +--- a/sysdeps/csky/dl-machine.h ++++ b/sysdeps/csky/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero if ELF header is compatible with the running host. */ + static inline int +@@ -172,10 +174,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_CKCORE_JUMP_SLOT + +-/* C-SKY never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/generic/dl-machine-rel.h b/sysdeps/generic/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..9167a1dffc715704 +--- /dev/null ++++ b/sysdeps/generic/dl-machine-rel.h +@@ -0,0 +1,27 @@ ++/* ELF dynamic relocation type supported by the architecture. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* Defined if the architecture supports Elf{32,64}_Rel relocations. */ ++#define ELF_MACHINE_NO_REL 1 ++/* Defined if the architecture supports Elf{32,64}_Rela relocations. */ ++#define ELF_MACHINE_NO_RELA 0 ++ ++#endif +diff --git a/sysdeps/generic/dl-machine.h b/sysdeps/generic/dl-machine.h +index 4a4ab4fc70ff1cf1..7da695d9030b000e 100644 +--- a/sysdeps/generic/dl-machine.h ++++ b/sysdeps/generic/dl-machine.h +@@ -20,6 +20,8 @@ + + #include + #include ++#include ++#include + + + /* Return nonzero iff ELF header is compatible with the running host. */ +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index 088931f67065250c..ac66f044189edd18 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -31,6 +31,8 @@ + #include + #include + #include ++#include ++#include + + /* These two definitions must match the definition of the stub in + bfd/elf32-hppa.c (see plt_stub[]). +@@ -525,10 +527,6 @@ asm ( \ + #define ELF_MACHINE_JMP_SLOT R_PARISC_IPLT + #define ELF_MACHINE_SIZEOF_JMP_SLOT PLT_ENTRY_SIZE + +-/* We only use RELA. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return the address of the entry point. */ + #define ELF_MACHINE_START_ADDRESS(map, start) \ + ({ \ +diff --git a/sysdeps/i386/dl-machine-rel.h b/sysdeps/i386/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..7ac46f78a69fbf98 +--- /dev/null ++++ b/sysdeps/i386/dl-machine-rel.h +@@ -0,0 +1,31 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* The i386 never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP ++#define ELF_MACHINE_NO_REL 0 ++ ++/* The i386 never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_PLT_REL 1 ++ ++#endif +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index fa902612ca8557f9..c55c9a3d64bed1f2 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -237,10 +239,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_386_JMP_SLOT + +-/* The i386 never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_PLT_REL 1 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +@@ -283,11 +281,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + + #endif /* !dl_machine_h */ + +-/* The i386 never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP +-#define ELF_MACHINE_NO_REL 0 +- + #ifdef RESOLVE_MAP + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). +diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h +index 2217d0b556c17683..c9608a51b0291164 100644 +--- a/sysdeps/ia64/dl-machine.h ++++ b/sysdeps/ia64/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ +@@ -319,10 +321,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_IA64_IPLTLSB + +-/* According to the IA-64 specific documentation, Rela is always used. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return the address of the entry point. */ + #define ELF_MACHINE_START_ADDRESS(map, start) \ + ({ \ +diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h +index 5e34c4784e348b19..30323d62d443645a 100644 +--- a/sysdeps/m68k/dl-machine.h ++++ b/sysdeps/m68k/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -183,10 +185,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_68K_JMP_SLOT + +-/* The m68k never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + static inline Elf32_Addr + elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Sym) *refsym, const ElfW(Sym) *sym, +diff --git a/sysdeps/microblaze/dl-machine.h b/sysdeps/microblaze/dl-machine.h +index 3fd4988e6093be1c..b8cc5a7fe65af90a 100644 +--- a/sysdeps/microblaze/dl-machine.h ++++ b/sysdeps/microblaze/dl-machine.h +@@ -23,6 +23,8 @@ + + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -169,10 +171,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_MICROBLAZE_JUMP_SLOT + +-/* The microblaze never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + static inline Elf32_Addr + elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Sym) *refsym, const ElfW(Sym) *sym, +diff --git a/sysdeps/mips/dl-machine-rel.h b/sysdeps/mips/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..ed396180412bc723 +--- /dev/null ++++ b/sysdeps/mips/dl-machine-rel.h +@@ -0,0 +1,26 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++#define ELF_MACHINE_PLT_REL 1 ++#define ELF_MACHINE_NO_REL 0 ++#define ELF_MACHINE_NO_RELA 0 ++ ++#endif +diff --git a/sysdeps/mips/dl-machine.h b/sysdeps/mips/dl-machine.h +index 7a821ceb8e518cef..45a394907a98be32 100644 +--- a/sysdeps/mips/dl-machine.h ++++ b/sysdeps/mips/dl-machine.h +@@ -33,6 +33,8 @@ + #include + #include + #include ++#include ++#include + + /* The offset of gp from GOT might be system-dependent. It's set by + ld. The same value is also */ +@@ -60,10 +62,6 @@ + ((((type) == ELF_MACHINE_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_MIPS_COPY) * ELF_RTYPE_CLASS_COPY)) + +-#define ELF_MACHINE_PLT_REL 1 +-#define ELF_MACHINE_NO_REL 0 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Translate a processor specific dynamic tag to the index + in l_info array. */ + #define DT_MIPS(x) (DT_MIPS_##x - DT_LOPROC + DT_NUM) +diff --git a/sysdeps/nios2/dl-machine.h b/sysdeps/nios2/dl-machine.h +index 4de602b13d5500f6..430ca5d7ae1e0372 100644 +--- a/sysdeps/nios2/dl-machine.h ++++ b/sysdeps/nios2/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -200,10 +202,6 @@ _start:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_NIOS2_JUMP_SLOT + +-/* The Nios II never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Fixup a PLT entry to bounce directly to the function at VALUE. */ + + static inline Elf32_Addr +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index cda012dc1b822254..8d062951ce0abd69 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -25,6 +25,8 @@ + #include + #include + #include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ +@@ -145,10 +147,6 @@ __elf_preferred_address(struct link_map *loader, size_t maplength, + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_PPC_JMP_SLOT + +-/* The PowerPC never uses REL relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +diff --git a/sysdeps/powerpc/powerpc64/dl-funcdesc.h b/sysdeps/powerpc/powerpc64/dl-funcdesc.h +new file mode 100644 +index 0000000000000000..b2d1f76ce02d629e +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/dl-funcdesc.h +@@ -0,0 +1,34 @@ ++/* PowerPC ELFv1 function descriptor definition. ++ Copyright (C) 2009-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FUNCDESC_H ++#define _DL_FUNCDESC_H ++ ++#if _CALL_ELF != 2 ++/* A PowerPC64 function descriptor. The .plt (procedure linkage ++ table) and .opd (official procedure descriptor) sections are ++ arrays of these. */ ++typedef struct ++{ ++ Elf64_Addr fd_func; ++ Elf64_Addr fd_toc; ++ Elf64_Addr fd_aux; ++} Elf64_FuncDesc; ++#endif ++ ++#endif +diff --git a/sysdeps/powerpc/powerpc64/dl-irel.h b/sysdeps/powerpc/powerpc64/dl-irel.h +index 0e11b7ff647c19d5..aa9a2dca71c3b05f 100644 +--- a/sysdeps/powerpc/powerpc64/dl-irel.h ++++ b/sysdeps/powerpc/powerpc64/dl-irel.h +@@ -23,7 +23,7 @@ + #include + #include + #include +-#include ++#include + + #define ELF_MACHINE_IRELA 1 + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index 3f92fbb369eb5023..3a4a21a4124dc72a 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -28,23 +28,14 @@ + #include + #include + #include ++#include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ + #define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +-#if _CALL_ELF != 2 +-/* A PowerPC64 function descriptor. The .plt (procedure linkage +- table) and .opd (official procedure descriptor) sections are +- arrays of these. */ +-typedef struct +-{ +- Elf64_Addr fd_func; +- Elf64_Addr fd_toc; +- Elf64_Addr fd_aux; +-} Elf64_FuncDesc; +-#endif +- + #define ELF_MULT_MACHINES_SUPPORTED + + /* Return nonzero iff ELF header is compatible with the running host. */ +@@ -292,10 +283,6 @@ BODY_PREFIX "_dl_start_user:\n" \ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +-/* The PowerPC never uses REL relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +diff --git a/sysdeps/riscv/dl-machine.h b/sysdeps/riscv/dl-machine.h +index 343c0feb6b437001..a9a3f63cb4d91f26 100644 +--- a/sysdeps/riscv/dl-machine.h ++++ b/sysdeps/riscv/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #ifndef _RTLD_PROLOGUE + # define _RTLD_PROLOGUE(entry) \ +@@ -51,9 +53,6 @@ + || (__WORDSIZE == 64 && (type) == R_RISCV_TLS_TPREL64))) \ + | (ELF_RTYPE_CLASS_COPY * ((type) == R_RISCV_COPY))) + +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute_used__ + elf_machine_matches_host (const ElfW(Ehdr) *ehdr) +diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h +index 96a5e80c846c816a..ba681d1eac7bda53 100644 +--- a/sysdeps/s390/s390-32/dl-machine.h ++++ b/sysdeps/s390/s390-32/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* This is an older, now obsolete value. */ + #define EM_S390_OLD 0xA390 +@@ -277,10 +279,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT + +-/* The S390 never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h +index c94d09b9c8512738..af2cffd9f904274e 100644 +--- a/sysdeps/s390/s390-64/dl-machine.h ++++ b/sysdeps/s390/s390-64/dl-machine.h +@@ -28,6 +28,8 @@ + #include + #include + #include ++#include ++#include + + #define ELF_MACHINE_IRELATIVE R_390_IRELATIVE + +@@ -225,10 +227,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT + +-/* The 64 bit S/390 never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h +index 0c22dfd8487a516e..d14023e7492f64e9 100644 +--- a/sysdeps/sh/dl-machine.h ++++ b/sysdeps/sh/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -251,10 +253,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + + #endif /* !dl_machine_h */ + +-/* SH never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + #ifdef RESOLVE_MAP + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). +diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h +index 6361cfae9eb8fa58..78f53bc49920fa46 100644 +--- a/sysdeps/sparc/sparc32/dl-machine.h ++++ b/sysdeps/sparc/sparc32/dl-machine.h +@@ -28,6 +28,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -196,10 +198,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT + +-/* The SPARC never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Undo the sub %sp, 6*4, %sp; add %sp, 22*4, %o0 below to get at the + value we want in __libc_stack_end. */ + #define DL_STACK_END(cookie) \ +diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h +index 3fd18c6e5ef21e38..3fa79d038fd38976 100644 +--- a/sysdeps/sparc/sparc64/dl-machine.h ++++ b/sysdeps/sparc/sparc64/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #define ELF64_R_TYPE_ID(info) ((info) & 0xff) + #define ELF64_R_TYPE_DATA(info) ((info) >> 8) +@@ -118,10 +120,6 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT + +-/* The SPARC never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h b/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h +index db388a022d552b8c..72b75d3bebfed0b5 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h ++++ b/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h +@@ -24,7 +24,7 @@ + #include + + #if (defined(__PPC64__) || defined(__powerpc64__)) && _CALL_ELF != 2 +-# include ++# include + /* The correct solution is for _dl_vdso_vsym to return the address of the OPD + for the kernel VDSO function. That address would then be stored in the + __vdso_* variables and returned as the result of the IFUNC resolver function. +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index 90c77cfea1de8d63..94296719d4d9fb82 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -208,10 +210,6 @@ _dl_start_user:\n\ + // XXX This is a work-around for a broken linker. Remove! + #define ELF_MACHINE_IRELATIVE R_X86_64_IRELATIVE + +-/* The x86-64 never uses Elf64_Rel/Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () diff --git a/SOURCES/glibc-upstream-2.34-139.patch b/SOURCES/glibc-upstream-2.34-139.patch new file mode 100644 index 0000000..8633732 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-139.patch @@ -0,0 +1,221 @@ +commit f6a54a304223666ea4af73260c99c830d7726eca +Author: Adhemerval Zanella +Date: Fri Oct 15 14:35:31 2021 -0300 + + elf: Fix elf_get_dynamic_info() for bootstrap + + THe d6d89608ac8c broke powerpc for --enable-bind-now because it turned + out that different than patch assumption rtld elf_get_dynamic_info() + does require to handle RTLD_BOOTSTRAP to avoid DT_FLAGS and + DT_RUNPATH (more specially the GLRO usage which is not reallocate + yet). + + This patch fixes by passing two arguments to elf_get_dynamic_info() + to inform that by rtld (bootstrap) or static pie initialization + (static_pie_bootstrap). I think using explicit argument is way more + clear and burried C preprocessor, and compiler should remove the + dead code. + + I checked on x86_64 and i686 with default options, --enable-bind-now, + and --enable-bind-now and --enable--static-pie. I also check on + aarch64, armhf, powerpc64, and powerpc with default and + --enable-bind-now. + + (cherry picked from commit 5118dcac68c4eadfd6304bb33adde63d062dc07f) + + Resolved conflicts: + elf/rtld.c - Manual merge. + +diff --git a/elf/dl-load.c b/elf/dl-load.c +index fb3da5aa565908a6..a920b12a906a9dec 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1296,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false, false); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index ababafcf98f9945d..757205affe65d9e1 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -25,7 +25,6 @@ + + #include + +-#define STATIC_PIE_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + #include "get-dynamic-info.h" +@@ -52,7 +51,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false, true); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 1ac0663d1ff5de24..f63e07dc6d2cd5e6 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -26,7 +26,8 @@ + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l) ++elf_get_dynamic_info (struct link_map *l, bool bootstrap, ++ bool static_pie_bootstrap) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -35,7 +36,7 @@ elf_get_dynamic_info (struct link_map *l) + #endif + + #ifndef STATIC_PIE_BOOTSTRAP +- if (l->l_ld == NULL) ++ if (!bootstrap && l->l_ld == NULL) + return; + #endif + +@@ -112,47 +113,63 @@ elf_get_dynamic_info (struct link_map *l) + if (info[DT_REL] != NULL) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif +-#ifdef STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); +-#endif +- if (info[DT_FLAGS] != NULL) ++ if (bootstrap || static_pie_bootstrap) + { +- /* Flags are used. Translate to the old form where available. +- Since these l_info entries are only tested for NULL pointers it +- is ok if they point to the DT_FLAGS entry. */ +- l->l_flags = info[DT_FLAGS]->d_un.d_val; +- +- if (l->l_flags & DF_SYMBOLIC) +- info[DT_SYMBOLIC] = info[DT_FLAGS]; +- if (l->l_flags & DF_TEXTREL) +- info[DT_TEXTREL] = info[DT_FLAGS]; +- if (l->l_flags & DF_BIND_NOW) +- info[DT_BIND_NOW] = info[DT_FLAGS]; ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); + } +- if (info[VERSYMIDX (DT_FLAGS_1)] != NULL) ++ if (bootstrap) + { +- l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val; +- if (l->l_flags_1 & DF_1_NODELETE) +- l->l_nodelete_pending = true; +- +- /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like +- to assert this, but we can't. Users have been setting +- unsupported DF_1_* flags for a long time and glibc has ignored +- them. Therefore to avoid breaking existing applications the +- best we can do is add a warning during debugging with the +- intent of notifying the user of the problem. */ +- if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0) +- && l->l_flags_1 & ~DT_1_SUPPORTED_MASK) +- _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x in DT_FLAGS_1.\n", +- l->l_flags_1 & ~DT_1_SUPPORTED_MASK); +- +- if (l->l_flags_1 & DF_1_NOW) +- info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)]; ++ /* Only the bind now flags are allowed. */ ++ assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL ++ || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); ++ /* Flags must not be set for ld.so. */ ++ assert (info[DT_FLAGS] == NULL ++ || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); + } +- if (info[DT_RUNPATH] != NULL) +- /* If both RUNPATH and RPATH are given, the latter is ignored. */ +- info[DT_RPATH] = NULL; ++ else ++ { ++ if (info[DT_FLAGS] != NULL) ++ { ++ /* Flags are used. Translate to the old form where available. ++ Since these l_info entries are only tested for NULL pointers it ++ is ok if they point to the DT_FLAGS entry. */ ++ l->l_flags = info[DT_FLAGS]->d_un.d_val; ++ ++ if (l->l_flags & DF_SYMBOLIC) ++ info[DT_SYMBOLIC] = info[DT_FLAGS]; ++ if (l->l_flags & DF_TEXTREL) ++ info[DT_TEXTREL] = info[DT_FLAGS]; ++ if (l->l_flags & DF_BIND_NOW) ++ info[DT_BIND_NOW] = info[DT_FLAGS]; ++ } ++ ++ if (info[VERSYMIDX (DT_FLAGS_1)] != NULL) ++ { ++ l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val; ++ if (l->l_flags_1 & DF_1_NODELETE) ++ l->l_nodelete_pending = true; ++ ++ /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like ++ to assert this, but we can't. Users have been setting ++ unsupported DF_1_* flags for a long time and glibc has ignored ++ them. Therefore to avoid breaking existing applications the ++ best we can do is add a warning during debugging with the ++ intent of notifying the user of the problem. */ ++ if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0) ++ && l->l_flags_1 & ~DT_1_SUPPORTED_MASK) ++ _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x " ++ "in DT_FLAGS_1.\n", ++ l->l_flags_1 & ~DT_1_SUPPORTED_MASK); ++ ++ if (l->l_flags_1 & DF_1_NOW) ++ info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)]; ++ } ++ ++ if (info[DT_RUNPATH] != NULL) ++ /* If both RUNPATH and RPATH are given, the latter is ignored. */ ++ info[DT_RPATH] = NULL; ++ } + } + + #endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 37d28d5a66d7b5d6..ad5ddb2a0ab94e7f 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -549,7 +549,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map); ++ elf_get_dynamic_info (&bootstrap_map, true, false); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1653,7 +1653,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false, false); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index f44748bc9858e5fd..3f20578046de76ed 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false, false); + _dl_setup_hash (l); + l->l_relocated = 1; + diff --git a/SOURCES/glibc-upstream-2.34-140.patch b/SOURCES/glibc-upstream-2.34-140.patch new file mode 100644 index 0000000..69ab10b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-140.patch @@ -0,0 +1,69 @@ +commit a31bbe3242266aaea423e5879f38aed69aea1d5e +Author: Adhemerval Zanella +Date: Thu Jul 29 11:13:57 2021 -0300 + + elf: Move LAV_CURRENT to link_lavcurrent.h + + No functional change. + + (cherry picked from commit 54816ae98d57930b7c945f17485714a5574bfe47) + + Resolved conflicts: + elf/Makefile + +diff --git a/bits/link_lavcurrent.h b/bits/link_lavcurrent.h +new file mode 100644 +index 0000000000000000..44fbea1e8060997f +--- /dev/null ++++ b/bits/link_lavcurrent.h +@@ -0,0 +1,25 @@ ++/* Data structure for communication from the run-time dynamic linker for ++ loaded ELF shared objects. LAV_CURRENT definition. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _LINK_H ++# error "Never include directly; use instead." ++#endif ++ ++/* Version numbers for la_version handshake interface. */ ++#define LAV_CURRENT 1 +diff --git a/elf/Makefile b/elf/Makefile +index cd8725c76f4cfb48..7fa80946ff3aae42 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -24,6 +24,7 @@ include ../Makeconfig + headers = \ + bits/elfclass.h \ + bits/link.h \ ++ bits/link_lavcurrent.h \ + elf.h \ + link.h \ + # headers +diff --git a/elf/link.h b/elf/link.h +index ff3a85c847930b9b..21a351686b9bf7c8 100644 +--- a/elf/link.h ++++ b/elf/link.h +@@ -96,7 +96,7 @@ struct link_map + #ifdef __USE_GNU + + /* Version numbers for la_version handshake interface. */ +-#define LAV_CURRENT 1 ++#include + + /* Activity types signaled through la_activity. */ + enum diff --git a/SOURCES/glibc-upstream-2.34-141.patch b/SOURCES/glibc-upstream-2.34-141.patch new file mode 100644 index 0000000..2856c96 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-141.patch @@ -0,0 +1,390 @@ +commit e25fe992132c460fecc1ab9fade185d5dd3f91ff +Author: Adhemerval Zanella +Date: Thu Nov 11 09:28:21 2021 -0300 + + elf: Move la_activity (LA_ACT_ADD) after _dl_add_to_namespace_list() (BZ #28062) + + It ensures that the the namespace is guaranteed to not be empty. + + Checked on x86_64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit ed3ce71f5c64c5f07cbde0ef03554ea8950d8f2c) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 7fa80946ff3aae42..bf6da98bdd15a18d 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -363,6 +363,7 @@ tests += \ + tst-audit15 \ + tst-audit16 \ + tst-audit17 \ ++ tst-audit18 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -623,6 +624,7 @@ modules-names = \ + tst-audit12mod2 \ + tst-audit12mod3 \ + tst-audit13mod1 \ ++ tst-audit18mod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -640,6 +642,7 @@ modules-names = \ + tst-auditmod9b \ + tst-auditmod11 \ + tst-auditmod12 \ ++ tst-auditmod18 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -1999,6 +2002,10 @@ $(objpfx)tst-auditmod17.so: $(objpfx)tst-auditmod17.os + CFLAGS-.os += $(call elide-stack-protector,.os,tst-auditmod17) + tst-audit17-ENV = LD_AUDIT=$(objpfx)tst-auditmod17.so + ++$(objpfx)tst-audit18.out: $(objpfx)tst-auditmod18.so \ ++ $(objpfx)tst-audit18mod.so ++tst-audit18-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a920b12a906a9dec..a8c6df3959f2b331 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1054,42 +1054,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + /* This is the ELF header. We read it in `open_verify'. */ + header = (void *) fbp->buf; + +- /* Signal that we are going to add new objects. */ +- if (r->r_state == RT_CONSISTENT) +- { +-#ifdef SHARED +- /* Auditing checkpoint: we are going to add new objects. */ +- if ((mode & __RTLD_AUDIT) == 0 +- && __glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } +- } +-#endif +- +- /* Notify the debugger we have added some objects. We need to +- call _dl_debug_initialize in a static program in case dynamic +- linking has not been used before. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); +- LIBC_PROBE (map_start, 2, nsid, r); +- make_consistent = true; +- } +- else +- assert (r->r_state == RT_ADD); +- + /* Enter the new object in the list of loaded objects. */ + l = _dl_new_object (realname, name, l_type, loader, mode, nsid); + if (__glibc_unlikely (l == NULL)) +@@ -1511,6 +1475,44 @@ cannot enable executable stack as shared object requires"); + /* Now that the object is fully initialized add it to the object list. */ + _dl_add_to_namespace_list (l, nsid); + ++ /* Signal that we are going to add new objects. */ ++ if (r->r_state == RT_CONSISTENT) ++ { ++#ifdef SHARED ++ /* Auditing checkpoint: we are going to add new objects. Since this ++ is called after _dl_add_to_namespace_list the namespace is guaranteed ++ to not be empty. */ ++ if ((mode & __RTLD_AUDIT) == 0 ++ && __glibc_unlikely (GLRO(dl_naudit) > 0)) ++ { ++ struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; ++ /* Do not call the functions for any auditing object. */ ++ if (head->l_auditing == 0) ++ { ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->activity != NULL) ++ afct->activity (&link_map_audit_state (head, cnt)->cookie, ++ LA_ACT_ADD); ++ ++ afct = afct->next; ++ } ++ } ++ } ++#endif ++ ++ /* Notify the debugger we have added some objects. We need to ++ call _dl_debug_initialize in a static program in case dynamic ++ linking has not been used before. */ ++ r->r_state = RT_ADD; ++ _dl_debug_state (); ++ LIBC_PROBE (map_start, 2, nsid, r); ++ make_consistent = true; ++ } ++ else ++ assert (r->r_state == RT_ADD); ++ + #ifdef SHARED + /* Auditing checkpoint: we have a new object. */ + if (__glibc_unlikely (GLRO(dl_naudit) > 0) +diff --git a/elf/tst-audit18.c b/elf/tst-audit18.c +new file mode 100644 +index 0000000000000000..ef784908f60d50aa +--- /dev/null ++++ b/elf/tst-audit18.c +@@ -0,0 +1,129 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static int ++handle_restart (void) ++{ ++ { ++ void *h = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ ++ pid_t (*s) (void) = xdlsym (h, "getpid"); ++ TEST_COMPARE (s (), getpid ()); ++ ++ xdlclose (h); ++ } ++ ++ { ++ void *h = xdlmopen (LM_ID_NEWLM, "tst-audit18mod.so", RTLD_NOW); ++ ++ int (*foo) (void) = xdlsym (h, "foo"); ++ TEST_COMPARE (foo (), 10); ++ ++ xdlclose (h); ++ } ++ ++ return 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod18.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit18", 0, sc_allow_stderr); ++ ++ struct ++ { ++ const char *name; ++ bool found; ++ } audit_iface[] = ++ { ++ { "la_version", false }, ++ { "la_objsearch", false }, ++ { "la_activity", false }, ++ { "la_objopen", false }, ++ { "la_objclose", false }, ++ { "la_preinit", false }, ++#if __WORDSIZE == 32 ++ { "la_symbind32", false }, ++#elif __WORDSIZE == 64 ++ { "la_symbind64", false }, ++#endif ++ }; ++ ++ /* Some hooks are called more than once but the test only check if any ++ is called at least once. */ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ for (int i = 0; i < array_length (audit_iface); i++) ++ if (strncmp (buffer, audit_iface[i].name, ++ strlen (audit_iface[i].name)) == 0) ++ audit_iface[i].found = true; ++ } ++ free (buffer); ++ xfclose (out); ++ ++ for (int i = 0; i < array_length (audit_iface); i++) ++ TEST_COMPARE (audit_iface[i].found, true); ++ ++ support_capture_subprocess_free (&result); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit18mod.c b/elf/tst-audit18mod.c +new file mode 100644 +index 0000000000000000..096a9167c9f8353f +--- /dev/null ++++ b/elf/tst-audit18mod.c +@@ -0,0 +1,23 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++foo (void) ++{ ++ return 10; ++} +diff --git a/elf/tst-auditmod18.c b/elf/tst-auditmod18.c +new file mode 100644 +index 0000000000000000..182992e9fdb1620c +--- /dev/null ++++ b/elf/tst-auditmod18.c +@@ -0,0 +1,73 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return LAV_CURRENT; ++} ++ ++char * ++la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return (char *) name; ++} ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s\n", __func__); ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return LA_FLG_BINDTO | LA_FLG_BINDFROM; ++} ++ ++unsigned int ++la_objclose (uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return 0; ++} ++ ++void ++la_preinit (uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++} ++ ++uintptr_t ++#if __ELF_NATIVE_CLASS == 32 ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#else ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#endif ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return sym->st_value; ++} diff --git a/SOURCES/glibc-upstream-2.34-142.patch b/SOURCES/glibc-upstream-2.34-142.patch new file mode 100644 index 0000000..20e72f1 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-142.patch @@ -0,0 +1,159 @@ +commit ce0cb6d1d2daac2d58006a41c3d19c551b86f255 +Author: Adhemerval Zanella +Date: Mon Jul 19 15:47:51 2021 -0300 + + elf: Add _dl_audit_objopen + + It consolidates the code required to call la_objopen audit callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit aee6e90f93e285016b6cd9c8bd00402c19ba271b) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index bf6da98bdd15a18d..85165c0591412a45 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -118,6 +118,7 @@ elide-routines.os = \ + # interpreter and operating independent of libc. + rtld-routines = \ + $(all-dl-routines) \ ++ dl-audit \ + dl-compat \ + dl-conflict \ + dl-diagnostics \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +new file mode 100644 +index 0000000000000000..4066dfe85146b9d4 +--- /dev/null ++++ b/elf/dl-audit.c +@@ -0,0 +1,39 @@ ++/* Audit common functions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++_dl_audit_objopen (struct link_map *l, Lmid_t nsid) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0)) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objopen != NULL) ++ { ++ struct auditstate *state = link_map_audit_state (l, cnt); ++ state->bindflags = afct->objopen (l, nsid, &state->cookie); ++ l->l_audit_any_plt |= state->bindflags != 0; ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a8c6df3959f2b331..a2d73d025c65cd79 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1515,22 +1515,8 @@ cannot enable executable stack as shared object requires"); + + #ifdef SHARED + /* Auditing checkpoint: we have a new object. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) +- && !GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objopen != NULL) +- { +- struct auditstate *state = link_map_audit_state (l, cnt); +- state->bindflags = afct->objopen (l, nsid, &state->cookie); +- l->l_audit_any_plt |= state->bindflags != 0; +- } +- +- afct = afct->next; +- } +- } ++ if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) ++ _dl_audit_objopen (l, nsid); + #endif + + return l; +diff --git a/elf/rtld.c b/elf/rtld.c +index ad5ddb2a0ab94e7f..45fec0df3043b90a 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1064,25 +1064,6 @@ ERROR: audit interface '%s' requires version %d (maximum supported version %d); + dlmargs.map->l_auditing = 1; + } + +-/* Notify the the audit modules that the object MAP has already been +- loaded. */ +-static void +-notify_audit_modules_of_loaded_object (struct link_map *map) +-{ +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objopen != NULL) +- { +- struct auditstate *state = link_map_audit_state (map, cnt); +- state->bindflags = afct->objopen (map, LM_ID_BASE, &state->cookie); +- map->l_audit_any_plt |= state->bindflags != 0; +- } +- +- afct = afct->next; +- } +-} +- + /* Load all audit modules. */ + static void + load_audit_modules (struct link_map *main_map, struct audit_list *audit_list) +@@ -1101,8 +1082,8 @@ load_audit_modules (struct link_map *main_map, struct audit_list *audit_list) + program and the dynamic linker itself). */ + if (GLRO(dl_naudit) > 0) + { +- notify_audit_modules_of_loaded_object (main_map); +- notify_audit_modules_of_loaded_object (&GL(dl_rtld_map)); ++ _dl_audit_objopen (main_map, LM_ID_BASE); ++ _dl_audit_objopen (&GL(dl_rtld_map), LM_ID_BASE); + } + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index bcf1f199c5985c65..5709e4e48dff4355 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1372,6 +1372,11 @@ link_map_audit_state (struct link_map *l, size_t index) + return &base[index]; + } + } ++ ++/* Call the la_objopen from the audit modules for the link_map L on the ++ namespace identification NSID. */ ++void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/SOURCES/glibc-upstream-2.34-143.patch b/SOURCES/glibc-upstream-2.34-143.patch new file mode 100644 index 0000000..d93c0cb --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-143.patch @@ -0,0 +1,254 @@ +commit 66e9d27a090874ab93030a908eb86fc29f856151 +Author: Adhemerval Zanella +Date: Tue Jul 20 11:03:34 2021 -0300 + + elf: Add _dl_audit_activity_map and _dl_audit_activity_nsid + + It consolidates the code required to call la_activity audit + callback. + + Also for a new Lmid_t the namespace link_map list are empty, so it + requires to check if before using it. This can happen for when audit + module is used along with dlmopen. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 3dac3959a5cb585b065cef2cb8a8d909c907e202) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 4066dfe85146b9d4..74b87f4b39be75e1 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -18,6 +18,32 @@ + + #include + ++void ++_dl_audit_activity_map (struct link_map *l, int action) ++{ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->activity != NULL) ++ afct->activity (&link_map_audit_state (l, cnt)->cookie, action); ++ afct = afct->next; ++ } ++} ++ ++void ++_dl_audit_activity_nsid (Lmid_t nsid, int action) ++{ ++ /* If head is NULL, the namespace has become empty, and the audit interface ++ does not give us a way to signal LA_ACT_CONSISTENT for it because the ++ first loaded module is used to identify the namespace. */ ++ struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; ++ if (__glibc_likely (GLRO(dl_naudit) == 0) ++ || head == NULL || head->l_auditing) ++ return; ++ ++ _dl_audit_activity_map (head, action); ++} ++ + void + _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + { +diff --git a/elf/dl-close.c b/elf/dl-close.c +index f6fbf9de7d78555b..5a8cc9e7cb5186cc 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -472,25 +472,7 @@ _dl_close_worker (struct link_map *map, bool force) + + #ifdef SHARED + /* Auditing checkpoint: we will start deleting objects. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct link_map *head = ns->_ns_loaded; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_DELETE); +- } +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); + #endif + + /* Notify the debugger we are about to remove some loaded objects. */ +@@ -785,32 +767,9 @@ _dl_close_worker (struct link_map *map, bool force) + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + #ifdef SHARED +- /* Auditing checkpoint: we have deleted all objects. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct link_map *head = ns->_ns_loaded; +- /* If head is NULL, the namespace has become empty, and the +- audit interface does not give us a way to signal +- LA_ACT_CONSISTENT for it because the first loaded module is +- used to identify the namespace. +- +- Furthermore, do not notify auditors of the cleanup of a +- failed audit module loading attempt. */ +- if (head != NULL && head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_CONSISTENT); +- } +- +- afct = afct->next; +- } +- } +- } ++ /* Auditing checkpoint: we have deleted all objects. Also, do not notify ++ auditors of the cleanup of a failed audit module loading attempt. */ ++ _dl_audit_activity_nsid (nsid, LA_ACT_CONSISTENT); + #endif + + if (__builtin_expect (ns->_ns_loaded == NULL, 0) +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a2d73d025c65cd79..baf0a926053deaed 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1482,24 +1482,8 @@ cannot enable executable stack as shared object requires"); + /* Auditing checkpoint: we are going to add new objects. Since this + is called after _dl_add_to_namespace_list the namespace is guaranteed + to not be empty. */ +- if ((mode & __RTLD_AUDIT) == 0 +- && __glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } +- } ++ if ((mode & __RTLD_AUDIT) == 0) ++ _dl_audit_activity_nsid (nsid, LA_ACT_ADD); + #endif + + /* Notify the debugger we have added some objects. We need to +diff --git a/elf/dl-open.c b/elf/dl-open.c +index bc68e2c376debd71..3f01aa480730da13 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -611,25 +611,7 @@ dl_open_worker_begin (void *a) + + #ifdef SHARED + /* Auditing checkpoint: we have added all objects. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[new->l_ns]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_CONSISTENT); +- } +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); + #endif + + /* Notify the debugger all new objects are now ready to go. */ +diff --git a/elf/rtld.c b/elf/rtld.c +index 45fec0df3043b90a..b6bb46ca97b7972f 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1804,18 +1804,7 @@ dl_main (const ElfW(Phdr) *phdr, + + /* Auditing checkpoint: we are ready to signal that the initial map + is being constructed. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (main_map, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } ++ _dl_audit_activity_map (main_map, LA_ACT_ADD); + + /* We have two ways to specify objects to preload: via environment + variable and via the file /etc/ld.so.preload. The latter can also +@@ -2496,23 +2485,7 @@ dl_main (const ElfW(Phdr) *phdr, + + #ifdef SHARED + /* Auditing checkpoint: we have added all objects. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_CONSISTENT); +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); + #endif + + /* Notify the debugger all new objects are now ready to go. We must re-get +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 5709e4e48dff4355..7384abcf5e0e8e24 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1373,6 +1373,16 @@ link_map_audit_state (struct link_map *l, size_t index) + } + } + ++/* Call the la_activity from the audit modules from the link map L and issues ++ the ACTION argument. */ ++void _dl_audit_activity_map (struct link_map *l, int action) ++ attribute_hidden; ++ ++/* Call the la_activity from the audit modules from the link map from the ++ namespace NSID and issues the ACTION argument. */ ++void _dl_audit_activity_nsid (Lmid_t nsid, int action) ++ attribute_hidden; ++ + /* Call the la_objopen from the audit modules for the link_map L on the + namespace identification NSID. */ + void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) diff --git a/SOURCES/glibc-upstream-2.34-144.patch b/SOURCES/glibc-upstream-2.34-144.patch new file mode 100644 index 0000000..62ab51c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-144.patch @@ -0,0 +1,157 @@ +commit ec0fc2a15358dc5f7191f9994f04b1385d14377d +Author: Adhemerval Zanella +Date: Tue Jul 20 13:47:36 2021 -0300 + + elf: Add _dl_audit_objsearch + + It consolidates the code required to call la_objsearch audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit c91008d3490e4e3ce29520068405f081f0d368ca) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 74b87f4b39be75e1..5682427220569d90 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -44,6 +44,28 @@ _dl_audit_activity_nsid (Lmid_t nsid, int action) + _dl_audit_activity_map (head, action); + } + ++const char * ++_dl_audit_objsearch (const char *name, struct link_map *l, unsigned int code) ++{ ++ if (l == NULL || l->l_auditing || code == 0) ++ return name; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objsearch != NULL) ++ { ++ struct auditstate *state = link_map_audit_state (l, cnt); ++ name = afct->objsearch (name, &state->cookie, code); ++ if (name == NULL) ++ return NULL; ++ } ++ afct = afct->next; ++ } ++ ++ return name; ++} ++ + void + _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + { +diff --git a/elf/dl-load.c b/elf/dl-load.c +index baf0a926053deaed..eb6b658b698f5694 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1596,32 +1596,20 @@ open_verify (const char *name, int fd, + + #ifdef SHARED + /* Give the auditing libraries a chance. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) && whatcode != 0 +- && loader->l_auditing == 0) ++ if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { + const char *original_name = name; +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objsearch != NULL) +- { +- struct auditstate *state = link_map_audit_state (loader, cnt); +- name = afct->objsearch (name, &state->cookie, whatcode); +- if (name == NULL) +- /* Ignore the path. */ +- return -1; +- } +- +- afct = afct->next; +- } ++ name = _dl_audit_objsearch (name, loader, whatcode); ++ if (name == NULL) ++ return -1; + + if (fd != -1 && name != original_name && strcmp (name, original_name)) +- { +- /* An audit library changed what we're supposed to open, +- so FD no longer matches it. */ +- __close_nocancel (fd); +- fd = -1; +- } ++ { ++ /* An audit library changed what we're supposed to open, ++ so FD no longer matches it. */ ++ __close_nocancel (fd); ++ fd = -1; ++ } + } + #endif + +@@ -2060,36 +2048,17 @@ _dl_map_object (struct link_map *loader, const char *name, + #ifdef SHARED + /* Give the auditing libraries a chance to change the name before we + try anything. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) +- && (loader == NULL || loader->l_auditing == 0)) ++ if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ const char *before = name; ++ name = _dl_audit_objsearch (name, loader, LA_SER_ORIG); ++ if (name == NULL) + { +- if (afct->objsearch != NULL) +- { +- const char *before = name; +- struct auditstate *state = link_map_audit_state (loader, cnt); +- name = afct->objsearch (name, &state->cookie, LA_SER_ORIG); +- if (name == NULL) +- { +- /* Do not try anything further. */ +- fd = -1; +- goto no_file; +- } +- if (before != name && strcmp (before, name) != 0) +- { +- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES)) +- _dl_debug_printf ("audit changed filename %s -> %s\n", +- before, name); +- +- if (origname == NULL) +- origname = before; +- } +- } +- +- afct = afct->next; ++ fd = -1; ++ goto no_file; + } ++ if (before != name && strcmp (before, name) != 0) ++ origname = before; + } + #endif + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 7384abcf5e0e8e24..1f212a18d7bfc440 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1373,6 +1373,13 @@ link_map_audit_state (struct link_map *l, size_t index) + } + } + ++/* Call the la_objsearch from the audit modules from the link map L. If ++ ORIGNAME is non NULL, it is updated with the revious name prior calling ++ la_objsearch. */ ++const char *_dl_audit_objsearch (const char *name, struct link_map *l, ++ unsigned int code) ++ attribute_hidden; ++ + /* Call the la_activity from the audit modules from the link map L and issues + the ACTION argument. */ + void _dl_audit_activity_map (struct link_map *l, int action) diff --git a/SOURCES/glibc-upstream-2.34-145.patch b/SOURCES/glibc-upstream-2.34-145.patch new file mode 100644 index 0000000..f429360 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-145.patch @@ -0,0 +1,123 @@ +commit 198660741b23ec9defb19e22951d4a721de603c8 +Author: Adhemerval Zanella +Date: Tue Jul 20 14:04:51 2021 -0300 + + elf: Add _dl_audit_objclose + + It consolidates the code required to call la_objclose audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 311c9ee54ea963ff69bd3a2e6981c37e893b4c3e) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 5682427220569d90..cb1c3de93cba447b 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -85,3 +85,24 @@ _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + afct = afct->next; + } + } ++ ++void ++_dl_audit_objclose (struct link_map *l) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0) ++ || GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objclose != NULL) ++ { ++ struct auditstate *state= link_map_audit_state (l, cnt); ++ /* Return value is ignored. */ ++ afct->objclose (&state->cookie); ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 5a8cc9e7cb5186cc..985cd4e2821436af 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -260,9 +260,6 @@ _dl_close_worker (struct link_map *map, bool force) + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); + + /* Call all termination functions at once. */ +-#ifdef SHARED +- bool do_audit = GLRO(dl_naudit) > 0 && !ns->_ns_loaded->l_auditing; +-#endif + bool unload_any = false; + bool scope_mem_left = false; + unsigned int unload_global = 0; +@@ -296,22 +293,7 @@ _dl_close_worker (struct link_map *map, bool force) + + #ifdef SHARED + /* Auditing checkpoint: we remove an object. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objclose != NULL) +- { +- struct auditstate *state +- = link_map_audit_state (imap, cnt); +- /* Return value is ignored. */ +- (void) afct->objclose (&state->cookie); +- } +- +- afct = afct->next; +- } +- } ++ _dl_audit_objclose (imap); + #endif + + /* This object must not be used anymore. */ +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index c683884c355dfd52..b789cfb9f2ac6c85 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -146,21 +146,7 @@ _dl_fini (void) + + #ifdef SHARED + /* Auditing checkpoint: another object closed. */ +- if (!do_audit && __builtin_expect (GLRO(dl_naudit) > 0, 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objclose != NULL) +- { +- struct auditstate *state +- = link_map_audit_state (l, cnt); +- /* Return value is ignored. */ +- (void) afct->objclose (&state->cookie); +- } +- afct = afct->next; +- } +- } ++ _dl_audit_objclose (l); + #endif + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 1f212a18d7bfc440..982f23c0287955fe 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1394,6 +1394,10 @@ void _dl_audit_activity_nsid (Lmid_t nsid, int action) + namespace identification NSID. */ + void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + attribute_hidden; ++ ++/* Call the la_objclose from the audit modules for the link_map L. */ ++void _dl_audit_objclose (struct link_map *l) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/SOURCES/glibc-upstream-2.34-146.patch b/SOURCES/glibc-upstream-2.34-146.patch new file mode 100644 index 0000000..4024ad2 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-146.patch @@ -0,0 +1,334 @@ +commit b2d99731b6d27c719a30b8ffa931e91c73a6bb4b +Author: Adhemerval Zanella +Date: Tue Jul 20 15:58:35 2021 -0300 + + elf: Add _dl_audit_symbind_alt and _dl_audit_symbind + + It consolidates the code required to call la_symbind{32,64} audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit cda4f265c65fb6c4ce38ca1cf0a7e527c5e77cd5) + +diff --git a/elf/Versions b/elf/Versions +index 2af210b8f771c950..164682eaeaa9a1da 100644 +--- a/elf/Versions ++++ b/elf/Versions +@@ -58,6 +58,7 @@ ld { + _dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info; + _dl_deallocate_tls; _dl_make_stack_executable; + _dl_rtld_di_serinfo; _dl_starting_up; _dl_fatal_printf; ++ _dl_audit_symbind_alt; + _rtld_global; _rtld_global_ro; + + # Only here for gdb while a better method is developed. +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index cb1c3de93cba447b..a21530f30bc5524b 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + + void +@@ -106,3 +107,124 @@ _dl_audit_objclose (struct link_map *l) + afct = afct->next; + } + } ++ ++void ++_dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, void **value, ++ lookup_t result) ++{ ++ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) ++ return; ++ ++ const char *strtab = (const char *) D_PTR (result, l_info[DT_STRTAB]); ++ /* Compute index of the symbol entry in the symbol table of the DSO with ++ the definition. */ ++ unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result, l_info[DT_SYMTAB])); ++ ++ unsigned int altvalue = 0; ++ /* Synthesize a symbol record where the st_value field is the result. */ ++ ElfW(Sym) sym = *ref; ++ sym.st_value = (ElfW(Addr)) *value; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ struct auditstate *match_audit = link_map_audit_state (l, cnt); ++ struct auditstate *result_audit = link_map_audit_state (result, cnt); ++ if (afct->symbind != NULL ++ && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0 ++ || ((result_audit->bindflags & LA_FLG_BINDTO) ++ != 0))) ++ { ++ unsigned int flags = altvalue | LA_SYMB_DLSYM; ++ uintptr_t new_value = afct->symbind (&sym, ndx, ++ &match_audit->cookie, ++ &result_audit->cookie, ++ &flags, strtab + ref->st_name); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ altvalue = LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ ++ afct = afct->next; ++ } ++ ++ *value = (void *) sym.st_value; ++ } ++} ++rtld_hidden_def (_dl_audit_symbind_alt) ++ ++void ++_dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, ++ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, ++ lookup_t result) ++{ ++ reloc_result->bound = result; ++ /* Compute index of the symbol entry in the symbol table of the DSO with the ++ definition. */ ++ reloc_result->boundndx = (defsym - (ElfW(Sym) *) D_PTR (result, ++ l_info[DT_SYMTAB])); ++ ++ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) ++ { ++ /* Set all bits since this symbol binding is not interesting. */ ++ reloc_result->enterexit = (1u << DL_NNS) - 1; ++ return; ++ } ++ ++ /* Synthesize a symbol record where the st_value field is the result. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (*value); ++ ++ /* Keep track whether there is any interest in tracing the call in the lower ++ two bits. */ ++ assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); ++ assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); ++ reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; ++ ++ const char *strtab2 = (const void *) D_PTR (result, l_info[DT_STRTAB]); ++ ++ unsigned int flags = 0; ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ /* XXX Check whether both DSOs must request action or only one */ ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *result_state = link_map_audit_state (result, cnt); ++ if ((l_state->bindflags & LA_FLG_BINDFROM) != 0 ++ && (result_state->bindflags & LA_FLG_BINDTO) != 0) ++ { ++ if (afct->symbind != NULL) ++ { ++ uintptr_t new_value = afct->symbind (&sym, ++ reloc_result->boundndx, ++ &l_state->cookie, ++ &result_state->cookie, ++ &flags, ++ strtab2 + defsym->st_name); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ flags |= LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ } ++ ++ /* Remember the results for every audit library and store a summary ++ in the first two bits. */ ++ reloc_result->enterexit &= flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT); ++ reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT)) ++ << ((cnt + 1) * 2)); ++ } ++ else ++ /* If the bind flags say this auditor is not interested, set the bits ++ manually. */ ++ reloc_result->enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ++ << ((cnt + 1) * 2)); ++ afct = afct->next; ++ } ++ ++ reloc_result->flags = flags; ++ *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index 61c260ddb81b586c..c4413c9165cec8cb 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -297,84 +297,7 @@ _dl_profile_fixup ( + auditing libraries the possibility to change the value and + tell us whether further auditing is wanted. */ + if (defsym != NULL && GLRO(dl_naudit) > 0) +- { +- reloc_result->bound = result; +- /* Compute index of the symbol entry in the symbol table of +- the DSO with the definition. */ +- reloc_result->boundndx = (defsym +- - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); +- +- /* Determine whether any of the two participating DSOs is +- interested in auditing. */ +- if ((l->l_audit_any_plt | result->l_audit_any_plt) != 0) +- { +- unsigned int flags = 0; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Synthesize a symbol record where the st_value field is +- the result. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (value); +- +- /* Keep track whether there is any interest in tracing +- the call in the lower two bits. */ +- assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); +- assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); +- reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; +- +- const char *strtab2 = (const void *) D_PTR (result, +- l_info[DT_STRTAB]); +- +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- /* XXX Check whether both DSOs must request action or +- only one */ +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *result_state +- = link_map_audit_state (result, cnt); +- if ((l_state->bindflags & LA_FLG_BINDFROM) != 0 +- && (result_state->bindflags & LA_FLG_BINDTO) != 0) +- { +- if (afct->symbind != NULL) +- { +- uintptr_t new_value +- = afct->symbind (&sym, reloc_result->boundndx, +- &l_state->cookie, +- &result_state->cookie, +- &flags, +- strtab2 + defsym->st_name); +- if (new_value != (uintptr_t) sym.st_value) +- { +- flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- } +- +- /* Remember the results for every audit library and +- store a summary in the first two bits. */ +- reloc_result->enterexit +- &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT); +- reloc_result->enterexit +- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) +- << ((cnt + 1) * 2)); +- } +- else +- /* If the bind flags say this auditor is not interested, +- set the bits manually. */ +- reloc_result->enterexit +- |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) +- << ((cnt + 1) * 2)); +- +- afct = afct->next; +- } +- +- reloc_result->flags = flags; +- value = DL_FIXUP_ADDR_VALUE (sym.st_value); +- } +- else +- /* Set all bits since this symbol binding is not interesting. */ +- reloc_result->enterexit = (1u << DL_NNS) - 1; +- } ++ _dl_audit_symbind (l, reloc_result, defsym, &value, result); + #endif + + /* Store the result for later runs. */ +diff --git a/elf/dl-sym-post.h b/elf/dl-sym-post.h +index d68c2d2b1cd43c9b..a11095d3e8c3c937 100644 +--- a/elf/dl-sym-post.h ++++ b/elf/dl-sym-post.h +@@ -52,54 +52,9 @@ _dl_sym_post (lookup_t result, const ElfW(Sym) *ref, void *value, + tell us whether further auditing is wanted. */ + if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { +- const char *strtab = (const char *) D_PTR (result, +- l_info[DT_STRTAB]); +- /* Compute index of the symbol entry in the symbol table of +- the DSO with the definition. */ +- unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); +- + if (match == NULL) + match = _dl_sym_find_caller_link_map (caller); +- +- if ((match->l_audit_any_plt | result->l_audit_any_plt) != 0) +- { +- unsigned int altvalue = 0; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Synthesize a symbol record where the st_value field is +- the result. */ +- ElfW(Sym) sym = *ref; +- sym.st_value = (ElfW(Addr)) value; +- +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- struct auditstate *match_audit +- = link_map_audit_state (match, cnt); +- struct auditstate *result_audit +- = link_map_audit_state (result, cnt); +- if (afct->symbind != NULL +- && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0 +- || ((result_audit->bindflags & LA_FLG_BINDTO) +- != 0))) +- { +- unsigned int flags = altvalue | LA_SYMB_DLSYM; +- uintptr_t new_value +- = afct->symbind (&sym, ndx, +- &match_audit->cookie, +- &result_audit->cookie, +- &flags, strtab + ref->st_name); +- if (new_value != (uintptr_t) sym.st_value) +- { +- altvalue = LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- } +- +- afct = afct->next; +- } +- +- value = (void *) sym.st_value; +- } ++ _dl_audit_symbind_alt (match, ref, &value, result); + } + #endif + return value; +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 982f23c0287955fe..61f1dfb3f79a613a 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1398,6 +1398,16 @@ void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + /* Call the la_objclose from the audit modules for the link_map L. */ + void _dl_audit_objclose (struct link_map *l) + attribute_hidden; ++ ++/* Call the la_symbind{32,64} from the audit modules for the link_map L. */ ++void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, ++ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, ++ lookup_t result) ++ attribute_hidden; ++/* Same as _dl_audit_symbind, but also sets LA_SYMB_DLSYM flag. */ ++void _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, ++ void **value, lookup_t result); ++rtld_hidden_proto (_dl_audit_symbind_alt) + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/SOURCES/glibc-upstream-2.34-147.patch b/SOURCES/glibc-upstream-2.34-147.patch new file mode 100644 index 0000000..5c93ad3 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-147.patch @@ -0,0 +1,107 @@ +commit 31473c273be14270f8eef68e35c03fd2305eb2c3 +Author: Adhemerval Zanella +Date: Thu Jul 22 17:10:57 2021 -0300 + + elf: Add _dl_audit_preinit + + It consolidates the code required to call la_preinit audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 0b98a8748759e88b58927882a8714109abe0a2d6) + +diff --git a/csu/libc-start.c b/csu/libc-start.c +index 0350b006fdcc22d2..d01e57ea59ceb880 100644 +--- a/csu/libc-start.c ++++ b/csu/libc-start.c +@@ -377,32 +377,15 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + /* This is a current program. Use the dynamic segment to find + constructors. */ + call_init (argc, argv, __environ); +-#else /* !SHARED */ +- call_init (argc, argv, __environ); +-#endif /* SHARED */ + +-#ifdef SHARED + /* Auditing checkpoint: we have a new object. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->preinit != NULL) +- afct->preinit (&link_map_audit_state (head, cnt)->cookie); +- +- afct = afct->next; +- } +- } +-#endif ++ _dl_audit_preinit (GL(dl_ns)[LM_ID_BASE]._ns_loaded); + +-#ifdef SHARED + if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS)) + GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]); +-#endif ++#else /* !SHARED */ ++ call_init (argc, argv, __environ); + +-#ifndef SHARED + _dl_debug_initialize (0, LM_ID_BASE); + #endif + +diff --git a/elf/Versions b/elf/Versions +index 164682eaeaa9a1da..bb6697647b397772 100644 +--- a/elf/Versions ++++ b/elf/Versions +@@ -58,7 +58,7 @@ ld { + _dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info; + _dl_deallocate_tls; _dl_make_stack_executable; + _dl_rtld_di_serinfo; _dl_starting_up; _dl_fatal_printf; +- _dl_audit_symbind_alt; ++ _dl_audit_symbind_alt; _dl_audit_preinit; + _rtld_global; _rtld_global_ro; + + # Only here for gdb while a better method is developed. +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index a21530f30bc5524b..0b6fac8e48877c93 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -108,6 +108,21 @@ _dl_audit_objclose (struct link_map *l) + } + } + ++void ++_dl_audit_preinit (struct link_map *l) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0)) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->preinit != NULL) ++ afct->preinit (&link_map_audit_state (l, cnt)->cookie); ++ afct = afct->next; ++ } ++} ++ + void + _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, void **value, + lookup_t result) +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 61f1dfb3f79a613a..91193a036fc5c6ef 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1399,6 +1399,9 @@ void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + void _dl_audit_objclose (struct link_map *l) + attribute_hidden; + ++/* Call the la_preinit from the audit modules for the link_map L. */ ++void _dl_audit_preinit (struct link_map *l); ++ + /* Call the la_symbind{32,64} from the audit modules for the link_map L. */ + void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, diff --git a/SOURCES/glibc-upstream-2.34-148.patch b/SOURCES/glibc-upstream-2.34-148.patch new file mode 100644 index 0000000..11ffb9f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-148.patch @@ -0,0 +1,206 @@ +commit fd9c4e8a1b72fa1372855051217f9480680d882a +Author: Adhemerval Zanella +Date: Thu Jul 22 17:45:33 2021 -0300 + + elf: Add _dl_audit_pltenter + + It consolidates the code required to call la_pltenter audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit eff687e8462b0eaf65992a6031b54a4b1cd16796) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 0b6fac8e48877c93..15250c67e8ac1658 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -17,7 +17,9 @@ + . */ + + #include ++#include + #include ++#include + + void + _dl_audit_activity_map (struct link_map *l, int action) +@@ -243,3 +245,78 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + reloc_result->flags = flags; + *value = DL_FIXUP_ADDR_VALUE (sym.st_value); + } ++ ++void ++_dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, ++ DL_FIXUP_VALUE_TYPE *value, void *regs, long int *framesize) ++{ ++ /* Don't do anything if no auditor wants to intercept this call. */ ++ if (GLRO(dl_naudit) == 0 ++ || (reloc_result->enterexit & LA_SYMB_NOPLTENTER)) ++ return; ++ ++ /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been ++ initialized earlier in this function or in another thread. */ ++ assert (DL_FIXUP_VALUE_CODE_ADDR (*value) != 0); ++ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, ++ l_info[DT_SYMTAB]) ++ + reloc_result->boundndx); ++ ++ /* Set up the sym parameter. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (*value); ++ ++ /* Get the symbol name. */ ++ const char *strtab = (const void *) D_PTR (reloc_result->bound, ++ l_info[DT_STRTAB]); ++ const char *symname = strtab + sym.st_name; ++ ++ /* Keep track of overwritten addresses. */ ++ unsigned int flags = reloc_result->flags; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->ARCH_LA_PLTENTER != NULL ++ && (reloc_result->enterexit ++ & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0) ++ { ++ long int new_framesize = -1; ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *bound_state ++ = link_map_audit_state (reloc_result->bound, cnt); ++ uintptr_t new_value ++ = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx, ++ &l_state->cookie, &bound_state->cookie, ++ regs, &flags, symname, &new_framesize); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ flags |= LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ ++ /* Remember the results for every audit library and store a summary ++ in the first two bits. */ ++ reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT)) ++ << (2 * (cnt + 1))); ++ ++ if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT ++ << (2 * (cnt + 1)))) ++ == 0 && new_framesize != -1 && *framesize != -2) ++ { ++ /* If this is the first call providing information, use it. */ ++ if (*framesize == -1) ++ *framesize = new_framesize; ++ /* If two pltenter calls provide conflicting information, use ++ the larger value. */ ++ else if (new_framesize != *framesize) ++ *framesize = MAX (new_framesize, *framesize); ++ } ++ } ++ ++ afct = afct->next; ++ } ++ ++ *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index c4413c9165cec8cb..dfedeaf2dd1c7253 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -320,78 +320,7 @@ _dl_profile_fixup ( + #ifdef SHARED + /* Auditing checkpoint: report the PLT entering and allow the + auditors to change the value. */ +- if (GLRO(dl_naudit) > 0 +- /* Don't do anything if no auditor wants to intercept this call. */ +- && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0) +- { +- /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been +- initialized earlier in this function or in another thread. */ +- assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0); +- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, +- l_info[DT_SYMTAB]) +- + reloc_result->boundndx); +- +- /* Set up the sym parameter. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (value); +- +- /* Get the symbol name. */ +- const char *strtab = (const void *) D_PTR (reloc_result->bound, +- l_info[DT_STRTAB]); +- const char *symname = strtab + sym.st_name; +- +- /* Keep track of overwritten addresses. */ +- unsigned int flags = reloc_result->flags; +- +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->ARCH_LA_PLTENTER != NULL +- && (reloc_result->enterexit +- & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0) +- { +- long int new_framesize = -1; +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *bound_state +- = link_map_audit_state (reloc_result->bound, cnt); +- uintptr_t new_value +- = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx, +- &l_state->cookie, +- &bound_state->cookie, +- regs, &flags, symname, +- &new_framesize); +- if (new_value != (uintptr_t) sym.st_value) +- { +- flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- +- /* Remember the results for every audit library and +- store a summary in the first two bits. */ +- reloc_result->enterexit +- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) +- << (2 * (cnt + 1))); +- +- if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT +- << (2 * (cnt + 1)))) +- == 0 && new_framesize != -1 && framesize != -2) +- { +- /* If this is the first call providing information, +- use it. */ +- if (framesize == -1) +- framesize = new_framesize; +- /* If two pltenter calls provide conflicting information, +- use the larger value. */ +- else if (new_framesize != framesize) +- framesize = MAX (new_framesize, framesize); +- } +- } +- +- afct = afct->next; +- } +- +- value = DL_FIXUP_ADDR_VALUE (sym.st_value); +- } ++ _dl_audit_pltenter (l, reloc_result, &value, regs, &framesize); + #endif + + /* Store the frame size information. */ +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 91193a036fc5c6ef..ea187dd266f14e06 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1411,6 +1411,10 @@ void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + void _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, + void **value, lookup_t result); + rtld_hidden_proto (_dl_audit_symbind_alt) ++void _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, ++ DL_FIXUP_VALUE_TYPE *value, void *regs, ++ long int *framesize) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/SOURCES/glibc-upstream-2.34-149.patch b/SOURCES/glibc-upstream-2.34-149.patch new file mode 100644 index 0000000..927d0e7 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-149.patch @@ -0,0 +1,715 @@ +commit a8e211daea6bdb505b10319ed3492e7d871c1e75 +Author: Adhemerval Zanella +Date: Thu Jul 22 18:02:42 2021 -0300 + + elf: Add _dl_audit_pltexit + + It consolidates the code required to call la_pltexit audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 8c0664e2b861fd3789602cc0b0b1922b0e20cb3a) + + Resolved conflicts: + sysdeps/hppa/dl-runtime.c + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 15250c67e8ac1658..152712b12fed6de2 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -20,6 +20,8 @@ + #include + #include + #include ++#include ++#include + + void + _dl_audit_activity_map (struct link_map *l, int action) +@@ -320,3 +322,48 @@ _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, + + *value = DL_FIXUP_ADDR_VALUE (sym.st_value); + } ++ ++void ++DL_ARCH_FIXUP_ATTRIBUTE ++_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg, ++ const void *inregs, void *outregs) ++{ ++ const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]); ++ ++ /* This is the address in the array where we store the result of previous ++ relocations. */ ++ // XXX Maybe the bound information must be stored on the stack since ++ // XXX with bind_not a new value could have been stored in the meantime. ++ struct reloc_result *reloc_result = ++ &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; ++ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, ++ l_info[DT_SYMTAB]) ++ + reloc_result->boundndx); ++ ++ /* Set up the sym parameter. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr); ++ ++ /* Get the symbol name. */ ++ const char *strtab = (const void *) D_PTR (reloc_result->bound, ++ l_info[DT_STRTAB]); ++ const char *symname = strtab + sym.st_name; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->ARCH_LA_PLTEXIT != NULL ++ && (reloc_result->enterexit ++ & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0) ++ { ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *bound_state ++ = link_map_audit_state (reloc_result->bound, cnt); ++ afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx, ++ &l_state->cookie, &bound_state->cookie, ++ inregs, outregs, symname); ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index dfedeaf2dd1c7253..e42f6e8b8dfca08e 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -16,8 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#define IN_DL_RUNTIME 1 /* This can be tested in dl-machine.h. */ +- + #include + #include + #include +@@ -31,19 +29,6 @@ + #include + + +-#if (!ELF_MACHINE_NO_RELA && !defined ELF_MACHINE_PLT_REL) \ +- || ELF_MACHINE_NO_REL +-# define PLTREL ElfW(Rela) +-#else +-# define PLTREL ElfW(Rel) +-#endif +- +-/* The fixup functions might have need special attributes. If none +- are provided define the macro as empty. */ +-#ifndef ARCH_FIXUP_ATTRIBUTE +-# define ARCH_FIXUP_ATTRIBUTE +-#endif +- + /* This function is called through a special trampoline from the PLT the + first time each PLT entry is called. We must perform the relocation + specified in the PLT of the given shared object, and return the resolved +@@ -52,7 +37,7 @@ + function. */ + + DL_FIXUP_VALUE_TYPE +-attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++attribute_hidden __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE + _dl_fixup ( + # ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS + ELF_MACHINE_RUNTIME_FIXUP_ARGS, +@@ -148,7 +133,8 @@ _dl_fixup ( + + #ifndef PROF + DL_FIXUP_VALUE_TYPE +-__attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++__attribute ((noinline)) ++DL_ARCH_FIXUP_ATTRIBUTE + _dl_profile_fixup ( + #ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS + ELF_MACHINE_RUNTIME_FIXUP_ARGS, +@@ -332,52 +318,3 @@ _dl_profile_fixup ( + } + + #endif /* PROF */ +- +- +-#include +-void +-ARCH_FIXUP_ATTRIBUTE +-_dl_call_pltexit (struct link_map *l, ElfW(Word) reloc_arg, +- const void *inregs, void *outregs) +-{ +-#ifdef SHARED +- const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]); +- +- /* This is the address in the array where we store the result of previous +- relocations. */ +- // XXX Maybe the bound information must be stored on the stack since +- // XXX with bind_not a new value could have been stored in the meantime. +- struct reloc_result *reloc_result = +- &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; +- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, +- l_info[DT_SYMTAB]) +- + reloc_result->boundndx); +- +- /* Set up the sym parameter. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr); +- +- /* Get the symbol name. */ +- const char *strtab = (const void *) D_PTR (reloc_result->bound, +- l_info[DT_STRTAB]); +- const char *symname = strtab + sym.st_name; +- +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->ARCH_LA_PLTEXIT != NULL +- && (reloc_result->enterexit +- & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0) +- { +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *bound_state +- = link_map_audit_state (reloc_result->bound, cnt); +- afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx, +- &l_state->cookie, &bound_state->cookie, +- inregs, outregs, symname); +- } +- +- afct = afct->next; +- } +-#endif +-} +diff --git a/elf/dl-support.c b/elf/dl-support.c +index c5ee5d33aa7e1d65..f29dc965f4d10648 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -437,3 +437,11 @@ _dl_get_dl_main_map (void) + return &_dl_main_map; + } + #endif ++ ++/* This is used by _dl_runtime_profile, not used on static code. */ ++void ++DL_ARCH_FIXUP_ATTRIBUTE ++_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg, ++ const void *inregs, void *outregs) ++{ ++} +diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S +index a7e9267c1c6a4863..9b352b1d0f7d62e7 100644 +--- a/sysdeps/aarch64/dl-trampoline.S ++++ b/sysdeps/aarch64/dl-trampoline.S +@@ -293,7 +293,7 @@ _dl_runtime_profile: + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + add x2, x29, #OFFSET_RG + add x3, x29, #OFFSET_RV +- bl _dl_call_pltexit ++ bl _dl_audit_pltexit + + ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S +index 9dfce5b0839dc122..55380d48ad8536ee 100644 +--- a/sysdeps/alpha/dl-trampoline.S ++++ b/sysdeps/alpha/dl-trampoline.S +@@ -187,7 +187,7 @@ _dl_runtime_profile_new: + jsr $26, ($27), 0 + ldgp $29, 0($26) + +- /* Set up for call to _dl_call_pltexit. */ ++ /* Set up for call to _dl_audit_pltexit. */ + ldq $16, 16*8($15) + ldq $17, 17*8($15) + stq $0, 16*8($15) +@@ -196,7 +196,7 @@ _dl_runtime_profile_new: + lda $19, 16*8($15) + stt $f0, 18*8($15) + stt $f1, 19*8($15) +- bsr $26, _dl_call_pltexit !samegp ++ bsr $26, _dl_audit_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) +@@ -518,7 +518,7 @@ _dl_runtime_profile_old: + jsr $26, ($27), 0 + ldgp $29, 0($26) + +- /* Set up for call to _dl_call_pltexit. */ ++ /* Set up for call to _dl_audit_pltexit. */ + ldq $16, 48*8($15) + ldq $17, 49*8($15) + stq $0, 46*8($15) +@@ -527,7 +527,7 @@ _dl_runtime_profile_old: + lda $19, 46*8($15) + stt $f0, 48*8($15) + stt $f1, 49*8($15) +- bsr $26, _dl_call_pltexit !samegp ++ bsr $26, _dl_audit_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) +diff --git a/sysdeps/arm/dl-machine-rel.h b/sysdeps/arm/dl-machine-rel.h +index bec114706cd027a4..a9ee25a6b1d381ac 100644 +--- a/sysdeps/arm/dl-machine-rel.h ++++ b/sysdeps/arm/dl-machine-rel.h +@@ -28,4 +28,6 @@ + Prelinked libraries may use Elf32_Rela though. */ + #define ELF_MACHINE_PLT_REL 1 + ++#define PLTREL ElfW(Rel) ++ + #endif +diff --git a/sysdeps/arm/dl-trampoline.S b/sysdeps/arm/dl-trampoline.S +index 70105308ca7df934..a2d322706db77981 100644 +--- a/sysdeps/arm/dl-trampoline.S ++++ b/sysdeps/arm/dl-trampoline.S +@@ -194,7 +194,7 @@ _dl_runtime_profile: + ldmia ip, {r0,r1} + add r2, r7, #72 + add r3, r7, #0 +- bl _dl_call_pltexit ++ bl _dl_audit_pltexit + + @ Return to caller. + ldmia r7, {r0-r3} +diff --git a/sysdeps/generic/dl-fixup-attribute.h b/sysdeps/generic/dl-fixup-attribute.h +new file mode 100644 +index 0000000000000000..aa92169b709b3fea +--- /dev/null ++++ b/sysdeps/generic/dl-fixup-attribute.h +@@ -0,0 +1,24 @@ ++/* ABI specifics for lazy resolution functions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FIXUP_ATTRIBUTE_H ++#define _DL_FIXUP_ATTRIBUTE_H ++ ++#define DL_ARCH_FIXUP_ATTRIBUTE ++ ++#endif +diff --git a/sysdeps/generic/dl-machine-rel.h b/sysdeps/generic/dl-machine-rel.h +index 9167a1dffc715704..9d5b7bb749e69e63 100644 +--- a/sysdeps/generic/dl-machine-rel.h ++++ b/sysdeps/generic/dl-machine-rel.h +@@ -23,5 +23,7 @@ + #define ELF_MACHINE_NO_REL 1 + /* Defined if the architecture supports Elf{32,64}_Rela relocations. */ + #define ELF_MACHINE_NO_RELA 0 ++/* Used to calculate the index of link_map l_reloc_result. */ ++#define PLTREL ElfW(Rela) + + #endif +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index ea187dd266f14e06..686f0a7b9709eb10 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1415,6 +1416,11 @@ void _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, + DL_FIXUP_VALUE_TYPE *value, void *regs, + long int *framesize) + attribute_hidden; ++void DL_ARCH_FIXUP_ATTRIBUTE _dl_audit_pltexit (struct link_map *l, ++ ElfW(Word) reloc_arg, ++ const void *inregs, ++ void *outregs) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED +diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c +index a71b5b2013abf723..8699171930f51489 100644 +--- a/sysdeps/hppa/dl-runtime.c ++++ b/sysdeps/hppa/dl-runtime.c +@@ -25,7 +25,7 @@ + return that to the caller. The caller will continue on to call + _dl_fixup with the relocation offset. */ + +-ElfW(Word) __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++ElfW(Word) __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE + _dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) + { + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; +diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S +index cb18ea7eabba41ed..c54879bae0148012 100644 +--- a/sysdeps/hppa/dl-trampoline.S ++++ b/sysdeps/hppa/dl-trampoline.S +@@ -300,7 +300,7 @@ L(cont): + ldw -4(%sp),%r1 + copy %r1, %sp + +- /* Arguments to _dl_call_pltexit */ ++ /* Arguments to _dl_audit_pltexit */ + ldw -116(%sp), %r26 /* (1) got[1] == struct link_map */ + ldw -120(%sp), %r25 /* (2) reloc offsets */ + ldo -56(%sp), %r24 /* (3) *La_hppa_regs */ +@@ -312,8 +312,8 @@ L(cont): + ldo -128(%sp), %r1 + fstd %fr4,0(%r1) + +- /* Call _dl_call_pltexit */ +- bl _dl_call_pltexit,%rp ++ /* Call _dl_audit_pltexit */ ++ bl _dl_audit_pltexit,%rp + nop + + /* Restore *La_hppa_retval */ +diff --git a/sysdeps/i386/dl-fixup-attribute.h b/sysdeps/i386/dl-fixup-attribute.h +new file mode 100644 +index 0000000000000000..c10e9936f4db7254 +--- /dev/null ++++ b/sysdeps/i386/dl-fixup-attribute.h +@@ -0,0 +1,30 @@ ++/* ABI specifics for lazy resolution functions. i386 version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FIXUP_ATTRIBUTE_H ++#define _DL_FIXUP_ATTRIBUTE_H ++ ++/* We cannot use this scheme for profiling because the _mcount call destroys ++ the passed register information. */ ++#ifndef PROF ++# define DL_ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused)) ++#else ++# define DL_ARCH_FIXUP_ATTRIBUTE ++#endif ++ ++#endif +diff --git a/sysdeps/i386/dl-machine-rel.h b/sysdeps/i386/dl-machine-rel.h +index 7ac46f78a69fbf98..bb3480d45415d761 100644 +--- a/sysdeps/i386/dl-machine-rel.h ++++ b/sysdeps/i386/dl-machine-rel.h +@@ -28,4 +28,6 @@ + Prelinked libraries may use Elf32_Rela though. */ + #define ELF_MACHINE_PLT_REL 1 + ++#define PLTREL ElfW(Rel) ++ + #endif +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index c55c9a3d64bed1f2..5483e903d81e85c6 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -122,29 +122,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + return lazy; + } + +-#ifdef IN_DL_RUNTIME +- +-# ifndef PROF +-/* We add a declaration of this function here so that in dl-runtime.c +- the ELF_MACHINE_RUNTIME_TRAMPOLINE macro really can pass the parameters +- in registers. +- +- We cannot use this scheme for profiling because the _mcount call +- destroys the passed register information. */ +-#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused)) +- +-extern ElfW(Addr) _dl_fixup (struct link_map *l, +- ElfW(Word) reloc_offset) +- ARCH_FIXUP_ATTRIBUTE; +-extern ElfW(Addr) _dl_profile_fixup (struct link_map *l, +- ElfW(Word) reloc_offset, +- ElfW(Addr) retaddr, void *regs, +- long int *framesizep) +- ARCH_FIXUP_ATTRIBUTE; +-# endif +- +-#endif +- + /* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ + #define ELF_MACHINE_USER_ADDRESS_MASK 0xf0000000UL +diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S +index b5ec0326df94f0fd..3a33051c52da9cde 100644 +--- a/sysdeps/i386/dl-trampoline.S ++++ b/sysdeps/i386/dl-trampoline.S +@@ -265,7 +265,7 @@ _dl_runtime_profile: + movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax + # PLT1 + movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx +- call _dl_call_pltexit ++ call _dl_audit_pltexit + movl LRV_EAX_OFFSET(%esp), %eax + movl LRV_EDX_OFFSET(%esp), %edx + fldt LRV_ST1_OFFSET(%esp) +diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S +index 3053405a3a21d62e..11e86932c75d5b6b 100644 +--- a/sysdeps/ia64/dl-trampoline.S ++++ b/sysdeps/ia64/dl-trampoline.S +@@ -133,7 +133,7 @@ END(_dl_runtime_resolve) + + + /* The fourth argument to _dl_profile_fixup and the third one to +- _dl_call_pltexit are a pointer to La_ia64_regs: ++ _dl_audit_pltexit are a pointer to La_ia64_regs: + + 8byte r8 + 8byte r9 +@@ -159,7 +159,7 @@ END(_dl_runtime_resolve) + 8byte sp + + The fifth argument to _dl_profile_fixup is a pointer to long int. +- The fourth argument to _dl_call_pltexit is a pointer to ++ The fourth argument to _dl_audit_pltexit is a pointer to + La_ia64_retval: + + 8byte r8 +@@ -261,7 +261,7 @@ ENTRY(_dl_runtime_profile) + } + { .mii + mov r18 = ar.unat /* save it in La_ia64_regs */ +- mov loc7 = out3 /* save it for _dl_call_pltexit */ ++ mov loc7 = out3 /* save it for _dl_audit_pltexit */ + mov loc5 = r11 /* preserve language specific register */ + } + { .mmi +@@ -272,7 +272,7 @@ ENTRY(_dl_runtime_profile) + } + { .mii + mov ar.unat = r17 /* restore it for function call */ +- mov loc8 = r16 /* save it for _dl_call_pltexit */ ++ mov loc8 = r16 /* save it for _dl_audit_pltexit */ + nop.i 0x0 + } + { .mmi +@@ -291,7 +291,7 @@ ENTRY(_dl_runtime_profile) + { .mmi + stf.spill [r2] = f14, 32 + stf.spill [r3] = f15, 24 +- mov loc9 = out1 /* save it for _dl_call_pltexit */ ++ mov loc9 = out1 /* save it for _dl_audit_pltexit */ + ;; + } + { .mmb +@@ -426,7 +426,7 @@ ENTRY(_dl_runtime_profile) + br.call.sptk.many b0 = b6 + } + { .mii +- /* Prepare stack for _dl_call_pltexit. Loc10 has the original ++ /* Prepare stack for _dl_audit_pltexit. Loc10 has the original + stack pointer. */ + adds r12 = -PLTEXIT_FRAME_SIZE, loc10 + adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 +@@ -461,14 +461,14 @@ ENTRY(_dl_runtime_profile) + { .mmi + stf.spill [r2] = f12, 32 + stf.spill [r3] = f13, 32 +- /* We need to restore gp for _dl_call_pltexit. */ ++ /* We need to restore gp for _dl_audit_pltexit. */ + mov gp = loc11 + ;; + } + { .mmb + stf.spill [r2] = f14 + stf.spill [r3] = f15 +- br.call.sptk.many b0 = _dl_call_pltexit ++ br.call.sptk.many b0 = _dl_audit_pltexit + } + { .mmi + /* Load all the non-floating and floating return values. Skip +diff --git a/sysdeps/m68k/dl-trampoline.S b/sysdeps/m68k/dl-trampoline.S +index a51a5f7f573c6330..72bde664c31c4256 100644 +--- a/sysdeps/m68k/dl-trampoline.S ++++ b/sysdeps/m68k/dl-trampoline.S +@@ -202,7 +202,7 @@ _dl_runtime_profile: + cfi_adjust_cfa_offset (4) + move.l (32+FPSPACE)(%sp), -(%sp) + cfi_adjust_cfa_offset (4) +- jbsr _dl_call_pltexit ++ jbsr _dl_audit_pltexit + lea 16(%sp), %sp + cfi_adjust_cfa_offset (-16) + move.l (%sp)+, %d0 +diff --git a/sysdeps/mips/dl-machine-rel.h b/sysdeps/mips/dl-machine-rel.h +index ed396180412bc723..3d0dfec01f6b193e 100644 +--- a/sysdeps/mips/dl-machine-rel.h ++++ b/sysdeps/mips/dl-machine-rel.h +@@ -22,5 +22,6 @@ + #define ELF_MACHINE_PLT_REL 1 + #define ELF_MACHINE_NO_REL 0 + #define ELF_MACHINE_NO_RELA 0 ++#define PLTREL ElfW(Rel) + + #endif +diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S +index 61bd8571fcc93caa..97f0105ce780514e 100644 +--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S ++++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S +@@ -197,7 +197,7 @@ END(_dl_runtime_resolve) + #ifndef PROF + ENTRY (_dl_profile_resolve, 4) + /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we +- need to call _dl_call_pltexit. */ ++ need to call _dl_audit_pltexit. */ + std r31,-8(r1) + std r30,-16(r1) + /* We need to save the registers used to pass parameters, ie. r3 thru +@@ -452,7 +452,7 @@ L(restoreFXR2): + L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN +- bl JUMPTARGET(_dl_call_pltexit) ++ bl JUMPTARGET(_dl_audit_pltexit) + #ifndef SHARED + nop + #endif +diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h +index c224a2b92832af9b..9e4cd1055fe6ab20 100644 +--- a/sysdeps/s390/s390-32/dl-trampoline.h ++++ b/sysdeps/s390/s390-32/dl-trampoline.h +@@ -282,7 +282,7 @@ _dl_runtime_profile: + basr %r1,0 + 5: l %r14,7f-5b(%r1) + la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_32_retval * +- bas %r14,0(%r14,%r1) # call _dl_call_pltexit ++ bas %r14,0(%r14,%r1) # call _dl_audit_pltexit + + lr %r15,%r12 # remove stack frame + # undef FRAME_SIZE +@@ -301,7 +301,7 @@ _dl_runtime_profile: + br %r14 + + 6: .long _dl_profile_fixup - 0b +-7: .long _dl_call_pltexit - 5b ++7: .long _dl_audit_pltexit - 5b + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile + # undef SIZEOF_STRUCT_LA_S390_32_REGS +diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h +index ae741a3bad5ec77e..6e5bad40459ec765 100644 +--- a/sysdeps/s390/s390-64/dl-trampoline.h ++++ b/sysdeps/s390/s390-64/dl-trampoline.h +@@ -284,7 +284,7 @@ _dl_runtime_profile: + lmg %r2,%r4,CFA_OFF+PLT1_OFF(%r12) # r2, r3: args saved by PLT + # r4: struct La_s390_64_regs * + la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_64_retval * +- brasl %r14,_dl_call_pltexit ++ brasl %r14,_dl_audit_pltexit + + lgr %r15,%r12 # remove stack frame + # undef FRAME_SIZE +diff --git a/sysdeps/sh/dl-trampoline.S b/sysdeps/sh/dl-trampoline.S +index 824ac84ba1830ce5..f9038cd10ed5286f 100644 +--- a/sysdeps/sh/dl-trampoline.S ++++ b/sysdeps/sh/dl-trampoline.S +@@ -423,8 +423,8 @@ _dl_runtime_profile: + .align 2 + #ifdef SHARED + 7: .long _GLOBAL_OFFSET_TABLE_ +-8: .long _dl_call_pltexit@GOTOFF ++8: .long _dl_audit_pltexit@GOTOFF + #else +-8: .long _dl_call_pltexit ++8: .long _dl_audit_pltexit + #endif + .size _dl_runtime_profile, .-_dl_runtime_profile +diff --git a/sysdeps/sparc/sparc32/dl-trampoline.S b/sysdeps/sparc/sparc32/dl-trampoline.S +index 426f90c99a7ed369..2f64809731c865a2 100644 +--- a/sysdeps/sparc/sparc32/dl-trampoline.S ++++ b/sysdeps/sparc/sparc32/dl-trampoline.S +@@ -127,7 +127,7 @@ _dl_profile_invoke: + mov %l5, %o0 + mov %l6, %o1 + add %sp, (11 * 8), %o2 +- call _dl_call_pltexit ++ call _dl_audit_pltexit + add %sp, ( 9 * 8), %o3 + + ldd [%sp + ( 9 * 8)], %i0 +diff --git a/sysdeps/sparc/sparc64/dl-trampoline.S b/sysdeps/sparc/sparc64/dl-trampoline.S +index 8d59fa67209cd8ab..86605e37acd929fd 100644 +--- a/sysdeps/sparc/sparc64/dl-trampoline.S ++++ b/sysdeps/sparc/sparc64/dl-trampoline.S +@@ -196,7 +196,7 @@ _dl_profile_invoke: + mov %l5, %o0 + mov %l6, %o1 + add %sp, STACK_BIAS + (24 * 8), %o2 +- call _dl_call_pltexit ++ call _dl_audit_pltexit + add %sp, STACK_BIAS + (16 * 8), %o3 + + ldx [%sp + STACK_BIAS + (16 * 8)], %i0 +diff --git a/sysdeps/x86_64/dl-runtime.h b/sysdeps/x86_64/dl-runtime.h +index 9c8d3977eee27069..19ba33ef30970c20 100644 +--- a/sysdeps/x86_64/dl-runtime.h ++++ b/sysdeps/x86_64/dl-runtime.h +@@ -18,7 +18,7 @@ + 02111-1307 USA. */ + + /* The ABI calls for the PLT stubs to pass the index of the relocation +- and not its offset. In _dl_profile_fixup and _dl_call_pltexit we ++ and not its offset. In _dl_profile_fixup and _dl_audit_pltexit we + also use the index. Therefore it is wasteful to compute the offset + in the trampoline just to reverse the operation immediately + afterwards. */ +diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h +index b9a12970cd6206ee..b5de7efff778559e 100644 +--- a/sysdeps/x86_64/dl-trampoline.h ++++ b/sysdeps/x86_64/dl-trampoline.h +@@ -388,7 +388,7 @@ _dl_runtime_profile: + jns 3f + + /* There's nothing in the frame size, so there +- will be no call to the _dl_call_pltexit. */ ++ will be no call to the _dl_audit_pltexit. */ + + /* Get back registers content. */ + movq LR_RCX_OFFSET(%rsp), %rcx +@@ -436,7 +436,7 @@ _dl_runtime_profile: + mov 24(%rbx), %RSP_LP # Drop the copied stack content + + /* Now we have to prepare the La_x86_64_retval structure for the +- _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, ++ _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now, + so we just need to allocate the sizeof(La_x86_64_retval) space on + the stack, since the alignment has already been taken care of. */ + # ifdef RESTORE_AVX +@@ -491,7 +491,7 @@ _dl_runtime_profile: + movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index +- call _dl_call_pltexit ++ call _dl_audit_pltexit + + /* Restore return registers. */ + movq LRV_RAX_OFFSET(%rsp), %rax diff --git a/SOURCES/glibc-upstream-2.34-150.patch b/SOURCES/glibc-upstream-2.34-150.patch new file mode 100644 index 0000000..40829fd --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-150.patch @@ -0,0 +1,454 @@ +commit 29496b3103ff13aa3c1d8b62552a98f39da0fe59 +Author: Adhemerval Zanella +Date: Wed Jun 30 10:24:09 2021 -0300 + + elf: Avoid unnecessary slowdown from profiling with audit (BZ#15533) + + The rtld-audit interfaces introduces a slowdown due to enabling + profiling instrumentation (as if LD_AUDIT implied LD_PROFILE). + However, instrumenting is only necessary if one of audit libraries + provides PLT callbacks (la_pltenter or la_pltexit symbols). Otherwise, + the slowdown can be avoided. + + The following patch adjusts the logic that enables profiling to iterate + over all audit modules and check if any of those provides a PLT hook. + To keep la_symbind to work even without PLT callbacks, _dl_fixup now + calls the audit callback if the modules implements it. + + Co-authored-by: Alexander Monakov + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 063f9ba220f434c7f30dd65c4cff17c0c458a7cf) + + Resolved conflicts: + NEWS + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 85165c0591412a45..eab9d46b6165e6be 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -365,6 +365,7 @@ tests += \ + tst-audit16 \ + tst-audit17 \ + tst-audit18 \ ++ tst-audit19b \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -454,6 +455,7 @@ tests-internal += \ + neededtest2 \ + neededtest3 \ + neededtest4 \ ++ tst-audit19a \ + tst-create_format1 \ + tst-dl-hwcaps_split \ + tst-dlmopen2 \ +@@ -626,6 +628,7 @@ modules-names = \ + tst-audit12mod3 \ + tst-audit13mod1 \ + tst-audit18mod \ ++ tst-audit19bmod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -644,6 +647,8 @@ modules-names = \ + tst-auditmod11 \ + tst-auditmod12 \ + tst-auditmod18 \ ++ tst-auditmod19a \ ++ tst-auditmod19b \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2007,6 +2012,13 @@ $(objpfx)tst-audit18.out: $(objpfx)tst-auditmod18.so \ + $(objpfx)tst-audit18mod.so + tst-audit18-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit19a.out: $(objpfx)tst-auditmod19a.so ++tst-audit19a-ENV = LD_AUDIT=$(objpfx)tst-auditmod19a.so ++ ++$(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so ++$(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so ++tst-audit19b-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index 3447de7f3536cd70..5b69321bda1f2b27 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -205,12 +205,28 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int skip_ifunc = reloc_mode & __RTLD_NOIFUNC; + + #ifdef SHARED ++ bool consider_symbind = false; + /* If we are auditing, install the same handlers we need for profiling. */ + if ((reloc_mode & __RTLD_AUDIT) == 0) +- consider_profiling |= GLRO(dl_audit) != NULL; ++ { ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ /* Profiling is needed only if PLT hooks are provided. */ ++ if (afct->ARCH_LA_PLTENTER != NULL ++ || afct->ARCH_LA_PLTEXIT != NULL) ++ consider_profiling = 1; ++ if (afct->symbind != NULL) ++ consider_symbind = true; ++ ++ afct = afct->next; ++ } ++ } + #elif defined PROF + /* Never use dynamic linker profiling for gprof profiling code. */ + # define consider_profiling 0 ++#else ++# define consider_symbind 0 + #endif + + if (l->l_relocated) +@@ -272,7 +288,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); + + #ifndef PROF +- if (__glibc_unlikely (consider_profiling) ++ if ((consider_profiling || consider_symbind) + && l->l_info[DT_PLTRELSZ] != NULL) + { + /* Allocate the array which will contain the already found +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index e42f6e8b8dfca08e..77a5cccdcbcb9293 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -124,6 +124,37 @@ _dl_fixup ( + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + value = elf_ifunc_invoke (DL_FIXUP_VALUE_ADDR (value)); + ++#ifdef SHARED ++ /* Auditing checkpoint: we have a new binding. Provide the auditing ++ libraries the possibility to change the value and tell us whether further ++ auditing is wanted. ++ The l_reloc_result is only allocated if there is an audit module which ++ provides a la_symbind. */ ++ if (l->l_reloc_result != NULL) ++ { ++ /* This is the address in the array where we store the result of previous ++ relocations. */ ++ struct reloc_result *reloc_result ++ = &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; ++ unsigned int init = atomic_load_acquire (&reloc_result->init); ++ if (init == 0) ++ { ++ _dl_audit_symbind (l, reloc_result, sym, &value, result); ++ ++ /* Store the result for later runs. */ ++ if (__glibc_likely (! GLRO(dl_bind_not))) ++ { ++ reloc_result->addr = value; ++ /* Guarantee all previous writes complete before init is ++ updated. See CONCURRENCY NOTES below. */ ++ atomic_store_release (&reloc_result->init, 1); ++ } ++ } ++ else ++ value = reloc_result->addr; ++ } ++#endif ++ + /* Finally, fix up the plt itself. */ + if (__glibc_unlikely (GLRO(dl_bind_not))) + return value; +diff --git a/elf/rtld.c b/elf/rtld.c +index b6bb46ca97b7972f..f632a767d7a269ef 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1016,13 +1016,7 @@ ERROR: audit interface '%s' requires version %d (maximum supported version %d); + "la_objsearch\0" + "la_objopen\0" + "la_preinit\0" +-#if __ELF_NATIVE_CLASS == 32 +- "la_symbind32\0" +-#elif __ELF_NATIVE_CLASS == 64 +- "la_symbind64\0" +-#else +-# error "__ELF_NATIVE_CLASS must be defined" +-#endif ++ LA_SYMBIND "\0" + #define STRING(s) __STRING (s) + "la_" STRING (ARCH_LA_PLTENTER) "\0" + "la_" STRING (ARCH_LA_PLTEXIT) "\0" +diff --git a/elf/tst-audit19a.c b/elf/tst-audit19a.c +new file mode 100644 +index 0000000000000000..035cde9351c2711b +--- /dev/null ++++ b/elf/tst-audit19a.c +@@ -0,0 +1,38 @@ ++/* Check if DT_AUDIT a module without la_plt{enter,exit} symbols does not incur ++ in profiling (BZ#15533). ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ void *h = xdlopen ("tst-auditmod19a.so", RTLD_NOW); ++ ++ struct link_map *lmap; ++ TEST_VERIFY_EXIT (dlinfo (h, RTLD_DI_LINKMAP, &lmap) == 0); ++ ++ /* The internal array is only allocated if profiling is enabled. */ ++ TEST_VERIFY (lmap->l_reloc_result == NULL); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit19b.c b/elf/tst-audit19b.c +new file mode 100644 +index 0000000000000000..da015734f24e0d79 +--- /dev/null ++++ b/elf/tst-audit19b.c +@@ -0,0 +1,94 @@ ++/* Check if DT_AUDIT a module with la_plt{enter,exit} call la_symbind ++ for lazy resolution. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++int tst_audit18bmod1_func (void); ++ ++static int ++handle_restart (void) ++{ ++ TEST_COMPARE (tst_audit18bmod1_func (), 10); ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod18b.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit18b", 0, sc_allow_stderr); ++ ++ bool find_symbind = false; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ if (startswith (buffer, "la_symbind: tst_audit18bmod1_func") == 0) ++ find_symbind = true; ++ ++ TEST_COMPARE (find_symbind, true); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit19bmod.c b/elf/tst-audit19bmod.c +new file mode 100644 +index 0000000000000000..9ffdcd8f3ffbc38e +--- /dev/null ++++ b/elf/tst-audit19bmod.c +@@ -0,0 +1,23 @@ ++/* Extra module for tst-audit18b. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++tst_audit18bmod1_func (void) ++{ ++ return 10; ++} +diff --git a/elf/tst-auditmod19a.c b/elf/tst-auditmod19a.c +new file mode 100644 +index 0000000000000000..f58204099457743d +--- /dev/null ++++ b/elf/tst-auditmod19a.c +@@ -0,0 +1,25 @@ ++/* Audit module for tst-audit18a. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} +diff --git a/elf/tst-auditmod19b.c b/elf/tst-auditmod19b.c +new file mode 100644 +index 0000000000000000..e2248b2a75946746 +--- /dev/null ++++ b/elf/tst-auditmod19b.c +@@ -0,0 +1,46 @@ ++/* Audit module for tst-audit18b. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ return LA_FLG_BINDTO | LA_FLG_BINDFROM; ++} ++ ++uintptr_t ++#if __ELF_NATIVE_CLASS == 32 ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#else ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#endif ++{ ++ fprintf (stderr, "la_symbind: %s\n", symname); ++ return sym->st_value; ++} +diff --git a/include/link.h b/include/link.h +index 4dcf01d8aea90bc2..b3f160c278222b3c 100644 +--- a/include/link.h ++++ b/include/link.h +@@ -363,8 +363,10 @@ struct auditstate + + #if __ELF_NATIVE_CLASS == 32 + # define symbind symbind32 ++# define LA_SYMBIND "la_symbind32" + #elif __ELF_NATIVE_CLASS == 64 + # define symbind symbind64 ++# define LA_SYMBIND "la_symbind64" + #else + # error "__ELF_NATIVE_CLASS must be defined" + #endif diff --git a/SOURCES/glibc-upstream-2.34-151.patch b/SOURCES/glibc-upstream-2.34-151.patch new file mode 100644 index 0000000..21e804b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-151.patch @@ -0,0 +1,294 @@ +commit 02c6a3d35316d360ae08623f617b1873d2f6159a +Author: Adhemerval Zanella +Date: Wed Jun 30 15:51:31 2021 -0300 + + elf: Add audit tests for modules with TLSDESC + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit d1b38173c9255b1a4ae00018ad9b35404a7c74d0) + +diff --git a/elf/Makefile b/elf/Makefile +index eab9d46b6165e6be..29f545d2272bf6e2 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -807,6 +807,22 @@ modules-names += tst-gnu2-tls1mod + $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so + tst-gnu2-tls1mod.so-no-z-defs = yes + CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2 ++ ++tests += tst-audit-tlsdesc tst-audit-tlsdesc-dlopen ++modules-names += tst-audit-tlsdesc-mod1 tst-audit-tlsdesc-mod2 tst-auditmod-tlsdesc ++$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \ ++ $(objpfx)tst-audit-tlsdesc-mod2.so \ ++ $(shared-thread-library) ++CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2 ++CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2 ++$(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library) ++$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \ ++ $(objpfx)tst-audit-tlsdesc-mod2.so ++$(objpfx)tst-audit-tlsdesc-mod1.so: $(objpfx)tst-audit-tlsdesc-mod2.so ++$(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so ++tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so ++$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so ++tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so + endif + ifeq (yes,$(have-protected-data)) + modules-names += tst-protected1moda tst-protected1modb +diff --git a/elf/tst-audit-tlsdesc-dlopen.c b/elf/tst-audit-tlsdesc-dlopen.c +new file mode 100644 +index 0000000000000000..9c16bb087aca1b77 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-dlopen.c +@@ -0,0 +1,67 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static void * ++thr_func (void *mod) ++{ ++ int* (*get_global1)(void) = xdlsym (mod, "get_global1"); ++ int* (*get_global2)(void) = xdlsym (mod, "get_global2"); ++ void (*set_global2)(int) = xdlsym (mod, "set_global2"); ++ int* (*get_local1)(void) = xdlsym (mod, "get_local1"); ++ int* (*get_local2)(void) = xdlsym (mod, "get_local2"); ++ ++ int *global1 = get_global1 (); ++ TEST_COMPARE (*global1, 0); ++ ++*global1; ++ ++ int *global2 = get_global2 (); ++ TEST_COMPARE (*global2, 0); ++ ++*global2; ++ TEST_COMPARE (*global2, 1); ++ ++ set_global2 (10); ++ TEST_COMPARE (*global2, 10); ++ ++ int *local1 = get_local1 (); ++ TEST_COMPARE (*local1, 0); ++ ++*local1; ++ ++ int *local2 = get_local2 (); ++ TEST_COMPARE (*local2, 0); ++ ++*local2; ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ void *mod = xdlopen ("tst-audit-tlsdesc-mod1.so", RTLD_LAZY); ++ ++ pthread_t thr = xpthread_create (NULL, thr_func, mod); ++ void *r = xpthread_join (thr); ++ TEST_VERIFY (r == NULL); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit-tlsdesc-mod1.c b/elf/tst-audit-tlsdesc-mod1.c +new file mode 100644 +index 0000000000000000..61c7dd99a2fb5e28 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-mod1.c +@@ -0,0 +1,41 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++__thread int global1; ++ ++int * ++get_global1 (void) ++{ ++ return &global1; ++} ++ ++static __thread int local1; ++ ++void * ++get_local1 (void) ++{ ++ return &local1; ++} ++ ++extern __thread int global2; ++ ++void ++set_global2 (int v) ++{ ++ global2 = v; ++} +diff --git a/elf/tst-audit-tlsdesc-mod2.c b/elf/tst-audit-tlsdesc-mod2.c +new file mode 100644 +index 0000000000000000..28aef635f688ee03 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-mod2.c +@@ -0,0 +1,33 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++__thread int global2; ++ ++int * ++get_global2 (void) ++{ ++ return &global2; ++} ++ ++static __thread int local2; ++ ++void * ++get_local2 (void) ++{ ++ return &local2; ++} +diff --git a/elf/tst-audit-tlsdesc.c b/elf/tst-audit-tlsdesc.c +new file mode 100644 +index 0000000000000000..3c8be81c95528f47 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc.c +@@ -0,0 +1,60 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++extern __thread int global1; ++extern __thread int global2; ++void *get_local1 (void); ++void set_global2 (int v); ++void *get_local2 (void); ++ ++static void * ++thr_func (void *clousure) ++{ ++ TEST_COMPARE (global1, 0); ++ ++global1; ++ TEST_COMPARE (global2, 0); ++ ++global2; ++ TEST_COMPARE (global2, 1); ++ ++ set_global2 (10); ++ TEST_COMPARE (global2, 10); ++ ++ int *local1 = get_local1 (); ++ TEST_COMPARE (*local1, 0); ++ ++*local1; ++ ++ int *local2 = get_local2 (); ++ TEST_COMPARE (*local2, 0); ++ ++*local2; ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ pthread_t thr = xpthread_create (NULL, thr_func, NULL); ++ void *r = xpthread_join (thr); ++ TEST_VERIFY (r == NULL); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod-tlsdesc.c b/elf/tst-auditmod-tlsdesc.c +new file mode 100644 +index 0000000000000000..e4b835d1f1fb6f73 +--- /dev/null ++++ b/elf/tst-auditmod-tlsdesc.c +@@ -0,0 +1,25 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} diff --git a/SOURCES/glibc-upstream-2.34-152.patch b/SOURCES/glibc-upstream-2.34-152.patch new file mode 100644 index 0000000..e2c8172 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-152.patch @@ -0,0 +1,314 @@ +commit d1b9bee29a1c4e0b80db53f228e22550c3604894 +Author: Adhemerval Zanella +Date: Mon Jul 19 18:42:26 2021 -0300 + + elf: Issue audit la_objopen for vDSO + + The vDSO is is listed in the link_map chain, but is never the subject of + an la_objopen call. A new internal flag __RTLD_VDSO is added that + acts as __RTLD_OPENEXEC to allocate the required 'struct auditstate' + extra space for the 'struct link_map'. + + The return value from the callback is currently ignored, since there + is no PLT call involved by glibc when using the vDSO, neither the vDSO + are exported directly. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit f0e23d34a7bdf6b90fba954ee741419171ac41b2) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 29f545d2272bf6e2..465442bf59fa9794 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -366,6 +366,7 @@ tests += \ + tst-audit17 \ + tst-audit18 \ + tst-audit19b \ ++ tst-audit22 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -649,6 +650,7 @@ modules-names = \ + tst-auditmod18 \ + tst-auditmod19a \ + tst-auditmod19b \ ++ tst-auditmod22 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2035,6 +2037,9 @@ $(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so + $(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so + tst-audit19b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so ++tst-audit22-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-object.c b/elf/dl-object.c +index 1875599eb274dc35..dee49a32d4fdf07d 100644 +--- a/elf/dl-object.c ++++ b/elf/dl-object.c +@@ -59,16 +59,19 @@ _dl_new_object (char *realname, const char *libname, int type, + { + #ifdef SHARED + unsigned int naudit; +- if (__glibc_unlikely ((mode & __RTLD_OPENEXEC) != 0)) ++ if (__glibc_unlikely ((mode & (__RTLD_OPENEXEC | __RTLD_VDSO)) != 0)) + { +- assert (type == lt_executable); +- assert (nsid == LM_ID_BASE); ++ if (mode & __RTLD_OPENEXEC) ++ { ++ assert (type == lt_executable); ++ assert (nsid == LM_ID_BASE); + +- /* Ignore the specified libname for the main executable. It is +- only known with an explicit loader invocation. */ +- libname = ""; ++ /* Ignore the specified libname for the main executable. It is ++ only known with an explicit loader invocation. */ ++ libname = ""; ++ } + +- /* We create the map for the executable before we know whether ++ /* We create the map for the executable and vDSO before we know whether + we have auditing libraries and if yes, how many. Assume the + worst. */ + naudit = DL_NNS; +diff --git a/elf/rtld.c b/elf/rtld.c +index f632a767d7a269ef..b089e5cf4740443e 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1922,6 +1922,12 @@ dl_main (const ElfW(Phdr) *phdr, + assert (i == npreloads); + } + ++#ifdef NEED_DL_SYSINFO_DSO ++ /* Now that the audit modules are opened, call la_objopen for the vDSO. */ ++ if (GLRO(dl_sysinfo_map) != NULL) ++ _dl_audit_objopen (GLRO(dl_sysinfo_map), LM_ID_BASE); ++#endif ++ + /* Load all the libraries specified by DT_NEEDED entries. If LD_PRELOAD + specified some libraries to load, these are inserted before the actual + dependencies in the executable's searchlist for symbol resolution. */ +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index 3f20578046de76ed..2b013d974a377a83 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -30,7 +30,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + We just want our data structures to describe it as if we had just + mapped and relocated it normally. */ + struct link_map *l = _dl_new_object ((char *) "", "", lt_library, NULL, +- 0, LM_ID_BASE); ++ __RTLD_VDSO, LM_ID_BASE); + if (__glibc_likely (l != NULL)) + { + l->l_phdr = ((const void *) GLRO(dl_sysinfo_dso) +diff --git a/elf/tst-audit22.c b/elf/tst-audit22.c +new file mode 100644 +index 0000000000000000..18fd22a760ddc3d8 +--- /dev/null ++++ b/elf/tst-audit22.c +@@ -0,0 +1,124 @@ ++/* Check DTAUDIT and vDSO interaction. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static uintptr_t vdso_addr; ++ ++static int ++handle_restart (void) ++{ ++ fprintf (stderr, "vdso: %p\n", (void*) vdso_addr); ++ return 0; ++} ++ ++static uintptr_t ++parse_address (const char *str) ++{ ++ void *r; ++ TEST_COMPARE (sscanf (str, "%p\n", &r), 1); ++ return (uintptr_t) r; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ vdso_addr = getauxval (AT_SYSINFO_EHDR); ++ if (vdso_addr == 0) ++ FAIL_UNSUPPORTED ("getauxval (AT_SYSINFO_EHDR) returned 0"); ++ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod22.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); ++ ++ /* The respawned process should always print the vDSO address (otherwise it ++ will fails as unsupported). However, on some architectures the audit ++ module might see the vDSO with l_addr being 0, meaning a fixed mapping ++ (linux-gate.so). In this case we don't check its value against ++ AT_SYSINFO_EHDR one. */ ++ uintptr_t vdso_process = 0; ++ bool vdso_audit_found = false; ++ uintptr_t vdso_audit = 0; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ if (startswith (buffer, "vdso: ")) ++ vdso_process = parse_address (buffer + strlen ("vdso: ")); ++ else if (startswith (buffer, "vdso found: ")) ++ { ++ vdso_audit = parse_address (buffer + strlen ("vdso found: ")); ++ vdso_audit_found = true; ++ } ++ } ++ ++ TEST_COMPARE (vdso_audit_found, true); ++ if (vdso_audit != 0) ++ TEST_COMPARE (vdso_process, vdso_audit); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-auditmod22.c b/elf/tst-auditmod22.c +new file mode 100644 +index 0000000000000000..8e05ce8cbb215dd5 +--- /dev/null ++++ b/elf/tst-auditmod22.c +@@ -0,0 +1,51 @@ ++/* Check DTAUDIT and vDSO interaction. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ /* The linux-gate.so is placed at a fixed address, thus l_addr being 0, ++ and it might be the value reported as the AT_SYSINFO_EHDR. */ ++ if (map->l_addr == 0 && startswith (map->l_name, "linux-gate.so")) ++ fprintf (stderr, "vdso found: %p\n", NULL); ++ else if (map->l_addr == getauxval (AT_SYSINFO_EHDR)) ++ fprintf (stderr, "vdso found: %p\n", (void*) map->l_addr); ++ ++ return 0; ++} +diff --git a/include/dlfcn.h b/include/dlfcn.h +index a4c283728f94deb4..e73294b0af587913 100644 +--- a/include/dlfcn.h ++++ b/include/dlfcn.h +@@ -12,6 +12,8 @@ + #define __RTLD_AUDIT 0x08000000 + #define __RTLD_SECURE 0x04000000 /* Apply additional security checks. */ + #define __RTLD_NOIFUNC 0x02000000 /* Suppress calling ifunc functions. */ ++#define __RTLD_VDSO 0x01000000 /* Tell _dl_new_object the object is ++ system-loaded. */ + + #define __LM_ID_CALLER -2 + diff --git a/SOURCES/glibc-upstream-2.34-153.patch b/SOURCES/glibc-upstream-2.34-153.patch new file mode 100644 index 0000000..2dba7b1 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-153.patch @@ -0,0 +1,167 @@ +commit 2255621f0e2cd07f7a6147928ce644e13526ffb6 +Author: Adhemerval Zanella +Date: Wed Jun 30 17:33:57 2021 -0300 + + elf: Do not fail for failed dlmopen on audit modules (BZ #28061) + + The dl_main sets the LM_ID_BASE to RT_ADD just before starting to + add load new shared objects. The state is set to RT_CONSISTENT just + after all objects are loaded. + + However if a audit modules tries to dlmopen an inexistent module, + the _dl_open will assert that the namespace is in an inconsistent + state. + + This is different than dlopen, since first it will not use + LM_ID_BASE and second _dl_map_object_from_fd is the sole responsible + to set and reset the r_state value. + + So the assert on _dl_open can not really be seen if the state is + consistent, since _dt_main resets it. This patch removes the assert. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 484e672ddabe0a919a692520e6ac8f2580866235) + + Resolved conflicts: + elf/Makefile + elf/dl-open.c + +diff --git a/elf/Makefile b/elf/Makefile +index 465442bf59fa9794..91b2269257523a64 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -366,6 +366,7 @@ tests += \ + tst-audit17 \ + tst-audit18 \ + tst-audit19b \ ++ tst-audit20 \ + tst-audit22 \ + tst-auditmany \ + tst-auxobj \ +@@ -650,6 +651,7 @@ modules-names = \ + tst-auditmod18 \ + tst-auditmod19a \ + tst-auditmod19b \ ++ tst-auditmod20 \ + tst-auditmod22 \ + tst-auxvalmod \ + tst-big-note-lib \ +@@ -2037,6 +2039,9 @@ $(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so + $(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so + tst-audit19b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit20.out: $(objpfx)tst-auditmod20.so ++tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so ++ + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 3f01aa480730da13..bc6872632880634e 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -914,8 +914,6 @@ no more namespaces available for dlmopen()")); + the flag here. */ + } + +- assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT); +- + /* Release the lock. */ + __rtld_lock_unlock_recursive (GL(dl_load_lock)); + +diff --git a/elf/tst-audit20.c b/elf/tst-audit20.c +new file mode 100644 +index 0000000000000000..6f39ccee865b012b +--- /dev/null ++++ b/elf/tst-audit20.c +@@ -0,0 +1,25 @@ ++/* Check dlopen failure on audit modules. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static int ++do_test (void) ++{ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod20.c b/elf/tst-auditmod20.c +new file mode 100644 +index 0000000000000000..c57e50ee4e88dd6b +--- /dev/null ++++ b/elf/tst-auditmod20.c +@@ -0,0 +1,57 @@ ++/* Check dlopen failure on audit modules. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return LAV_CURRENT; ++} ++ ++static void ++check (void) ++{ ++ { ++ void *mod = dlopen ("nonexistent.so", RTLD_NOW); ++ if (mod != NULL) ++ abort (); ++ } ++ ++ { ++ void *mod = dlmopen (LM_ID_BASE, "nonexistent.so", RTLD_NOW); ++ if (mod != NULL) ++ abort (); ++ } ++} ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ if (flag != LA_ACT_CONSISTENT) ++ return; ++ check (); ++} ++ ++void ++la_preinit (uintptr_t *cookie) ++{ ++ check (); ++} diff --git a/SOURCES/glibc-upstream-2.34-154.patch b/SOURCES/glibc-upstream-2.34-154.patch new file mode 100644 index 0000000..15b64cf --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-154.patch @@ -0,0 +1,434 @@ +commit 98047ba95caf9ed596908ca73a22070c5e27597b +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:15 2022 -0300 + + elf: Add la_activity during application exit + + la_activity is not called during application exit, even though + la_objclose is. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 5fa11a2bc94c912c3b25860065086902674537ba) + +diff --git a/elf/Makefile b/elf/Makefile +index 91b2269257523a64..407aaeaeb8c84020 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -368,6 +368,7 @@ tests += \ + tst-audit19b \ + tst-audit20 \ + tst-audit22 \ ++ tst-audit23 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -631,6 +632,7 @@ modules-names = \ + tst-audit13mod1 \ + tst-audit18mod \ + tst-audit19bmod \ ++ tst-audit23mod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -653,6 +655,7 @@ modules-names = \ + tst-auditmod19b \ + tst-auditmod20 \ + tst-auditmod22 \ ++ tst-auditmod23 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2045,6 +2048,10 @@ tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit23.out: $(objpfx)tst-auditmod23.so \ ++ $(objpfx)tst-audit23mod.so ++tst-audit23-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index b789cfb9f2ac6c85..fa876da0ffa1cf97 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -64,6 +64,10 @@ _dl_fini (void) + __rtld_lock_unlock_recursive (GL(dl_load_lock)); + else + { ++#ifdef SHARED ++ _dl_audit_activity_nsid (ns, LA_ACT_DELETE); ++#endif ++ + /* Now we can allocate an array to hold all the pointers and + copy the pointers in. */ + struct link_map *maps[nloaded]; +@@ -153,6 +157,10 @@ _dl_fini (void) + /* Correct the previous increment. */ + --l->l_direct_opencount; + } ++ ++#ifdef SHARED ++ _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT); ++#endif + } + } + +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +new file mode 100644 +index 0000000000000000..4904cf1340a97ee1 +--- /dev/null ++++ b/elf/tst-audit23.c +@@ -0,0 +1,239 @@ ++/* Check for expected la_objopen and la_objeclose for all objects. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static int ++handle_restart (void) ++{ ++ xdlopen ("tst-audit23mod.so", RTLD_NOW); ++ xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static inline bool ++is_vdso (const char *str) ++{ ++ return startswith (str, "linux-gate") ++ || startswith (str, "linux-vdso"); ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ TEST_VERIFY_EXIT (((argc - 1) + 3) < array_length (spargv)); ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod23.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); ++ ++ /* The expected la_objopen/la_objclose: ++ 1. executable ++ 2. loader ++ 3. libc.so ++ 4. tst-audit23mod.so ++ 5. libc.so (LM_ID_NEWLM). ++ 6. vdso (optional and ignored). */ ++ enum { max_objs = 6 }; ++ struct la_obj_t ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ bool closed; ++ } objs[max_objs] = { [0 ... max_objs-1] = { .closed = false } }; ++ size_t nobjs = 0; ++ ++ /* The expected namespaces are one for the audit module, one for the ++ application, and another for the dlmopen on handle_restart. */ ++ enum { max_ns = 3 }; ++ uintptr_t acts[max_ns] = { 0 }; ++ size_t nacts = 0; ++ int last_act = -1; ++ uintptr_t last_act_cookie = -1; ++ bool seen_first_objclose = false; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ if (startswith (buffer, "la_activity: ")) ++ { ++ uintptr_t cookie; ++ int this_act; ++ int r = sscanf (buffer, "la_activity: %d %"SCNxPTR"", &this_act, ++ &cookie); ++ TEST_COMPARE (r, 2); ++ ++ /* The cookie identifies the object at the head of the link map, ++ so we only add a new namespace if it changes from the previous ++ one. This works since dlmopen is the last in the test body. */ ++ if (cookie != last_act_cookie && last_act_cookie != -1) ++ TEST_COMPARE (last_act, LA_ACT_CONSISTENT); ++ ++ if (this_act == LA_ACT_ADD && acts[nacts] != cookie) ++ { ++ acts[nacts++] = cookie; ++ last_act_cookie = cookie; ++ } ++ /* The LA_ACT_DELETE is called in the reverse order of LA_ACT_ADD ++ at program termination (if the tests adds a dlclose or a library ++ with extra dependencies this will need to be adapted). */ ++ else if (this_act == LA_ACT_DELETE) ++ { ++ last_act_cookie = acts[--nacts]; ++ TEST_COMPARE (acts[nacts], cookie); ++ acts[nacts] = 0; ++ } ++ else if (this_act == LA_ACT_CONSISTENT) ++ { ++ TEST_COMPARE (cookie, last_act_cookie); ++ ++ /* LA_ACT_DELETE must always be followed by an la_objclose. */ ++ if (last_act == LA_ACT_DELETE) ++ TEST_COMPARE (seen_first_objclose, true); ++ else ++ TEST_COMPARE (last_act, LA_ACT_ADD); ++ } ++ ++ last_act = this_act; ++ seen_first_objclose = false; ++ } ++ else if (startswith (buffer, "la_objopen: ")) ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ uintptr_t cookie; ++ int r = sscanf (buffer, "la_objopen: %"SCNxPTR" %ms %"SCNxPTR" %ld", ++ &cookie, &lname, &laddr, &lmid); ++ TEST_COMPARE (r, 4); ++ ++ /* la_objclose is not triggered by vDSO because glibc does not ++ unload it. */ ++ if (is_vdso (lname)) ++ continue; ++ if (nobjs == max_objs) ++ FAIL_EXIT1 ("non expected la_objopen: %s %"PRIxPTR" %ld", ++ lname, laddr, lmid); ++ objs[nobjs].lname = lname; ++ objs[nobjs].laddr = laddr; ++ objs[nobjs].lmid = lmid; ++ objs[nobjs].closed = false; ++ nobjs++; ++ ++ /* This indirectly checks that la_objopen always comes before ++ la_objclose btween la_activity calls. */ ++ seen_first_objclose = false; ++ } ++ else if (startswith (buffer, "la_objclose: ")) ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ uintptr_t cookie; ++ int r = sscanf (buffer, "la_objclose: %"SCNxPTR" %ms %"SCNxPTR" %ld", ++ &cookie, &lname, &laddr, &lmid); ++ TEST_COMPARE (r, 4); ++ ++ for (size_t i = 0; i < nobjs; i++) ++ { ++ if (strcmp (lname, objs[i].lname) == 0 && lmid == objs[i].lmid) ++ { ++ TEST_COMPARE (objs[i].closed, false); ++ objs[i].closed = true; ++ break; ++ } ++ } ++ ++ /* la_objclose should be called after la_activity(LA_ACT_DELETE) for ++ the closed object's namespace. */ ++ TEST_COMPARE (last_act, LA_ACT_DELETE); ++ if (!seen_first_objclose) ++ { ++ TEST_COMPARE (last_act_cookie, cookie); ++ seen_first_objclose = true; ++ } ++ } ++ } ++ ++ for (size_t i = 0; i < nobjs; i++) ++ { ++ TEST_COMPARE (objs[i].closed, true); ++ free (objs[i].lname); ++ } ++ ++ /* la_activity(LA_ACT_CONSISTENT) should be the last callback received. ++ Since only one link map may be not-CONSISTENT at a time, this also ++ ensures la_activity(LA_ACT_CONSISTENT) is the last callback received ++ for every namespace. */ ++ TEST_COMPARE (last_act, LA_ACT_CONSISTENT); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit23mod.c b/elf/tst-audit23mod.c +new file mode 100644 +index 0000000000000000..30315687037d25e8 +--- /dev/null ++++ b/elf/tst-audit23mod.c +@@ -0,0 +1,23 @@ ++/* Extra module for tst-audit23 ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++foo (void) ++{ ++ return 0; ++} +diff --git a/elf/tst-auditmod23.c b/elf/tst-auditmod23.c +new file mode 100644 +index 0000000000000000..d7c60d7a5cbc4f8a +--- /dev/null ++++ b/elf/tst-auditmod23.c +@@ -0,0 +1,74 @@ ++/* Audit module loaded by tst-audit23. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++struct map_desc_t ++{ ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++}; ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s: %d %"PRIxPTR"\n", __func__, flag, (uintptr_t) cookie); ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *l_name = map->l_name[0] == '\0' ? "mainapp" : map->l_name; ++ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__, ++ (uintptr_t) cookie, l_name, map->l_addr, lmid); ++ ++ struct map_desc_t *map_desc = malloc (sizeof (struct map_desc_t)); ++ if (map_desc == NULL) ++ abort (); ++ ++ map_desc->lname = strdup (l_name); ++ map_desc->laddr = map->l_addr; ++ map_desc->lmid = lmid; ++ ++ *cookie = (uintptr_t) map_desc; ++ ++ return 0; ++} ++ ++unsigned int ++la_objclose (uintptr_t *cookie) ++{ ++ struct map_desc_t *map_desc = (struct map_desc_t *) *cookie; ++ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__, ++ (uintptr_t) cookie, map_desc->lname, map_desc->laddr, ++ map_desc->lmid); ++ ++ return 0; ++} diff --git a/SOURCES/glibc-upstream-2.34-155.patch b/SOURCES/glibc-upstream-2.34-155.patch new file mode 100644 index 0000000..bb24edf --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-155.patch @@ -0,0 +1,299 @@ +commit efb21b5fb27fbad447d9f242436fb591870f0045 +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:16 2022 -0300 + + elf: Fix initial-exec TLS access on audit modules (BZ #28096) + + For audit modules and dependencies with initial-exec TLS, we can not + set the initial TLS image on default loader initialization because it + would already be set by the audit setup. However, subsequent thread + creation would need to follow the default behaviour. + + This patch fixes it by setting l_auditing link_map field not only + for the audit modules, but also for all its dependencies. This is + used on _dl_allocate_tls_init to avoid the static TLS initialization + at load time. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 254d3d5aef2fd8430c469e1938209ac100ebf132) + +diff --git a/elf/Makefile b/elf/Makefile +index 407aaeaeb8c84020..3ccf78f62985e2d0 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -367,6 +367,7 @@ tests += \ + tst-audit18 \ + tst-audit19b \ + tst-audit20 \ ++ tst-audit21 \ + tst-audit22 \ + tst-audit23 \ + tst-auditmany \ +@@ -654,6 +655,8 @@ modules-names = \ + tst-auditmod19a \ + tst-auditmod19b \ + tst-auditmod20 \ ++ tst-auditmod21a \ ++ tst-auditmod21b \ + tst-auditmod22 \ + tst-auditmod23 \ + tst-auxvalmod \ +@@ -2045,6 +2048,11 @@ tst-audit19b-ARGS = -- $(host-test-program-cmd) + $(objpfx)tst-audit20.out: $(objpfx)tst-auditmod20.so + tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so + ++$(objpfx)tst-audit21: $(shared-thread-library) ++$(objpfx)tst-audit21.out: $(objpfx)tst-auditmod21a.so ++$(objpfx)tst-auditmod21a.so: $(objpfx)tst-auditmod21b.so ++tst-audit21-ENV = LD_AUDIT=$(objpfx)tst-auditmod21a.so ++ + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index e2012d0cd515103b..fab6546e2d31edd4 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -519,8 +519,12 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + } + + ++/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage ++ for the TLS space. The DTV may be resized, and so this function may ++ call malloc to allocate that space. The loader's GL(dl_load_tls_lock) ++ is taken when manipulating global TLS-related data in the loader. */ + void * +-_dl_allocate_tls_init (void *result) ++_dl_allocate_tls_init (void *result, bool init_tls) + { + if (result == NULL) + /* The memory allocation failed. */ +@@ -593,7 +597,14 @@ _dl_allocate_tls_init (void *result) + some platforms use in static programs requires it. */ + dtv[map->l_tls_modid].pointer.val = dest; + +- /* Copy the initialization image and clear the BSS part. */ ++ /* Copy the initialization image and clear the BSS part. For ++ audit modules or dependencies with initial-exec TLS, we can not ++ set the initial TLS image on default loader initialization ++ because it would already be set by the audit setup. However, ++ subsequent thread creation would need to follow the default ++ behaviour. */ ++ if (map->l_ns != LM_ID_BASE && !init_tls) ++ continue; + memset (__mempcpy (dest, map->l_tls_initimage, + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); +@@ -620,7 +631,7 @@ _dl_allocate_tls (void *mem) + { + return _dl_allocate_tls_init (mem == NULL + ? _dl_allocate_tls_storage () +- : allocate_dtv (mem)); ++ : allocate_dtv (mem), true); + } + rtld_hidden_def (_dl_allocate_tls) + +diff --git a/elf/rtld.c b/elf/rtld.c +index b089e5cf4740443e..26c6fb6479c9008c 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2429,7 +2429,7 @@ dl_main (const ElfW(Phdr) *phdr, + into the main thread's TLS area, which we allocated above. + Note: thread-local variables must only be accessed after completing + the next step. */ +- _dl_allocate_tls_init (tcbp); ++ _dl_allocate_tls_init (tcbp, false); + + /* And finally install it for the main thread. */ + if (! tls_init_tp_called) +diff --git a/elf/tst-audit21.c b/elf/tst-audit21.c +new file mode 100644 +index 0000000000000000..3a47ab64d44421ee +--- /dev/null ++++ b/elf/tst-audit21.c +@@ -0,0 +1,42 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static volatile __thread int out __attribute__ ((tls_model ("initial-exec"))); ++ ++static void * ++tf (void *arg) ++{ ++ TEST_COMPARE (out, 0); ++ out = isspace (' '); ++ return NULL; ++} ++ ++int main (int argc, char *argv[]) ++{ ++ TEST_COMPARE (out, 0); ++ out = isspace (' '); ++ ++ pthread_t t = xpthread_create (NULL, tf, NULL); ++ xpthread_join (t); ++ ++ return 0; ++} +diff --git a/elf/tst-auditmod21a.c b/elf/tst-auditmod21a.c +new file mode 100644 +index 0000000000000000..f6d51b5c0531c49d +--- /dev/null ++++ b/elf/tst-auditmod21a.c +@@ -0,0 +1,80 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#define tls_ie __attribute__ ((tls_model ("initial-exec"))) ++ ++__thread int tls_var0 tls_ie; ++__thread int tls_var1 tls_ie = 0x10; ++ ++/* Defined at tst-auditmod21b.so */ ++extern __thread int tls_var2; ++extern __thread int tls_var3; ++ ++static volatile int out; ++ ++static void ++call_libc (void) ++{ ++ /* isspace accesses the initial-exec glibc TLS variables, which are ++ setup in glibc initialization. */ ++ out = isspace (' '); ++} ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ tls_var0 = 0x1; ++ if (tls_var1 != 0x10) ++ abort (); ++ tls_var1 = 0x20; ++ ++ tls_var2 = 0x2; ++ if (tls_var3 != 0x20) ++ abort (); ++ tls_var3 = 0x40; ++ ++ call_libc (); ++ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map* map, Lmid_t lmid, uintptr_t* cookie) ++{ ++ call_libc (); ++ *cookie = (uintptr_t) map; ++ return 0; ++} ++ ++void ++la_activity (uintptr_t* cookie, unsigned int flag) ++{ ++ if (tls_var0 != 0x1 || tls_var1 != 0x20) ++ abort (); ++ call_libc (); ++} ++ ++void ++la_preinit (uintptr_t* cookie) ++{ ++ call_libc (); ++} +diff --git a/elf/tst-auditmod21b.c b/elf/tst-auditmod21b.c +new file mode 100644 +index 0000000000000000..6ba5335b7514c674 +--- /dev/null ++++ b/elf/tst-auditmod21b.c +@@ -0,0 +1,22 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define tls_ie __attribute__ ((tls_model ("initial-exec"))) ++ ++__thread int tls_var2 tls_ie; ++__thread int tls_var3 tls_ie = 0x20; +diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c +index 50065bc9bd8a28e5..554a721f814b53c4 100644 +--- a/nptl/allocatestack.c ++++ b/nptl/allocatestack.c +@@ -139,7 +139,7 @@ get_cached_stack (size_t *sizep, void **memp) + memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t)); + + /* Re-initialize the TLS. */ +- _dl_allocate_tls_init (TLS_TPADJ (result)); ++ _dl_allocate_tls_init (TLS_TPADJ (result), true); + + return result; + } +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 686f0a7b9709eb10..a56060d0204cc453 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1254,7 +1254,7 @@ extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden; + /* These are internal entry points to the two halves of _dl_allocate_tls, + only used within rtld.c itself at startup time. */ + extern void *_dl_allocate_tls_storage (void) attribute_hidden; +-extern void *_dl_allocate_tls_init (void *); ++extern void *_dl_allocate_tls_init (void *, bool); + rtld_hidden_proto (_dl_allocate_tls_init) + + /* Deallocate memory allocated with _dl_allocate_tls. */ diff --git a/SOURCES/glibc-upstream-2.34-156.patch b/SOURCES/glibc-upstream-2.34-156.patch new file mode 100644 index 0000000..528b157 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-156.patch @@ -0,0 +1,1778 @@ +commit 056fc1c0e367b9682d89f60fd5c249f80074ff30 +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:17 2022 -0300 + + elf: Issue la_symbind for bind-now (BZ #23734) + + The audit symbind callback is not called for binaries built with + -Wl,-z,now or when LD_BIND_NOW=1 is used, nor the PLT tracking callbacks + (plt_enter and plt_exit) since this would change the expected + program semantics (where no PLT is expected) and would have performance + implications (such as for BZ#15533). + + LAV_CURRENT is also bumped to indicate the audit ABI change (where + la_symbind flags are set by the loader to indicate no possible PLT + trace). + + To handle powerpc64 ELFv1 function descriptor, _dl_audit_symbind + requires to know whether bind-now is used so the symbol value is + updated to function text segment instead of the OPD (for lazy binding + this is done by PPC64_LOAD_FUNCPTR on _dl_runtime_resolve). + + Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, + powerpc64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 32612615c58b394c3eb09f020f31310797ad3854) + + Resolved conflicts: + NEWS - Manual merge. + +diff --git a/bits/link_lavcurrent.h b/bits/link_lavcurrent.h +index 44fbea1e8060997f..c48835d12b512355 100644 +--- a/bits/link_lavcurrent.h ++++ b/bits/link_lavcurrent.h +@@ -22,4 +22,4 @@ + #endif + + /* Version numbers for la_version handshake interface. */ +-#define LAV_CURRENT 1 ++#define LAV_CURRENT 2 +diff --git a/elf/Makefile b/elf/Makefile +index 3ccf78f62985e2d0..0ab3e885f5e35671 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -370,6 +370,12 @@ tests += \ + tst-audit21 \ + tst-audit22 \ + tst-audit23 \ ++ tst-audit24a \ ++ tst-audit24b \ ++ tst-audit24c \ ++ tst-audit24d \ ++ tst-audit25a \ ++ tst-audit25b \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -634,6 +640,18 @@ modules-names = \ + tst-audit18mod \ + tst-audit19bmod \ + tst-audit23mod \ ++ tst-audit24amod1 \ ++ tst-audit24amod2 \ ++ tst-audit24bmod1 \ ++ tst-audit24bmod2 \ ++ tst-audit24dmod1 \ ++ tst-audit24dmod2 \ ++ tst-audit24dmod3 \ ++ tst-audit24dmod4 \ ++ tst-audit25mod1 \ ++ tst-audit25mod2 \ ++ tst-audit25mod3 \ ++ tst-audit25mod4 \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -659,6 +677,11 @@ modules-names = \ + tst-auditmod21b \ + tst-auditmod22 \ + tst-auditmod23 \ ++ tst-auditmod24a \ ++ tst-auditmod24b \ ++ tst-auditmod24c \ ++ tst-auditmod24d \ ++ tst-auditmod25 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -864,7 +887,8 @@ extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) + + # filtmod1.so, tst-big-note-lib.so, tst-ro-dynamic-mod.so have special + # rules. +-modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod ++modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod \ ++ tst-audit24bmod1 tst-audit24bmod2.so + + tests += $(tests-static) + +@@ -2060,6 +2084,69 @@ $(objpfx)tst-audit23.out: $(objpfx)tst-auditmod23.so \ + $(objpfx)tst-audit23mod.so + tst-audit23-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit24a.out: $(objpfx)tst-auditmod24a.so ++$(objpfx)tst-audit24a: $(objpfx)tst-audit24amod1.so \ ++ $(objpfx)tst-audit24amod2.so ++tst-audit24a-ENV = LD_AUDIT=$(objpfx)tst-auditmod24a.so ++LDFLAGS-tst-audit24a = -Wl,-z,now ++ ++$(objpfx)tst-audit24b.out: $(objpfx)tst-auditmod24b.so ++$(objpfx)tst-audit24b: $(objpfx)tst-audit24bmod1.so \ ++ $(objpfx)tst-audit24bmod2.so ++$(objpfx)tst-audit24bmod1: $(objpfx)tst-audit24bmod2.so ++# The test checks if a library without .gnu.version correctly calls the ++# audit callbacks. So it uses an explicit link rule to avoid linking ++# against libc.so. ++$(objpfx)tst-audit24bmod1.so: $(objpfx)tst-audit24bmod1.os ++ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod1.os \ ++ -Wl,-z,now ++ $(call after-link,$@.new) ++ mv -f $@.new $@ ++CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod1) ++$(objpfx)tst-audit24bmod2.so: $(objpfx)tst-audit24bmod2.os ++ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod2.os ++ $(call after-link,$@.new) ++ mv -f $@.new $@ ++CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod2) ++tst-audit24b-ENV = LD_AUDIT=$(objpfx)tst-auditmod24b.so ++LDFLAGS-tst-audit24b = -Wl,-z,now ++ ++# Same as tst-audit24a, but tests LD_BIND_NOW ++$(objpfx)tst-audit24c.out: $(objpfx)tst-auditmod24c.so ++$(objpfx)tst-audit24c: $(objpfx)tst-audit24amod1.so \ ++ $(objpfx)tst-audit24amod2.so ++tst-audit24c-ENV = LD_BIND_NOW=1 LD_AUDIT=$(objpfx)tst-auditmod24c.so ++LDFLAGS-tst-audit24b = -Wl,-z,lazy ++ ++$(objpfx)tst-audit24d.out: $(objpfx)tst-auditmod24d.so ++$(objpfx)tst-audit24d: $(objpfx)tst-audit24dmod1.so \ ++ $(objpfx)tst-audit24dmod2.so ++$(objpfx)tst-audit24dmod1.so: $(objpfx)tst-audit24dmod3.so ++LDFLAGS-tst-audit24dmod1.so = -Wl,-z,now ++$(objpfx)tst-audit24dmod2.so: $(objpfx)tst-audit24dmod4.so ++LDFLAGS-tst-audit24dmod2.so = -Wl,-z,lazy ++tst-audit24d-ENV = LD_AUDIT=$(objpfx)tst-auditmod24d.so ++LDFLAGS-tst-audit24d = -Wl,-z,lazy ++ ++$(objpfx)tst-audit25a.out: $(objpfx)tst-auditmod25.so ++$(objpfx)tst-audit25a: $(objpfx)tst-audit25mod1.so \ ++ $(objpfx)tst-audit25mod2.so \ ++ $(objpfx)tst-audit25mod3.so \ ++ $(objpfx)tst-audit25mod4.so ++$(objpfx)tst-audit25mod1.so: $(objpfx)tst-audit25mod3.so ++LDFLAGS-tst-audit25mod1.so = -Wl,-z,now ++$(objpfx)tst-audit25mod2.so: $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25mod2.so = -Wl,-z,lazy ++tst-audit25a-ARGS = -- $(host-test-program-cmd) ++ ++$(objpfx)tst-audit25b.out: $(objpfx)tst-auditmod25.so ++$(objpfx)tst-audit25b: $(objpfx)tst-audit25mod1.so \ ++ $(objpfx)tst-audit25mod2.so \ ++ $(objpfx)tst-audit25mod3.so \ ++ $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25b = -Wl,-z,now ++tst-audit25b-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 152712b12fed6de2..72a50717ef60a357 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -178,16 +178,23 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, + lookup_t result) + { +- reloc_result->bound = result; +- /* Compute index of the symbol entry in the symbol table of the DSO with the +- definition. */ +- reloc_result->boundndx = (defsym - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); ++ bool for_jmp_slot = reloc_result == NULL; ++ ++ /* Compute index of the symbol entry in the symbol table of the DSO ++ with the definition. */ ++ unsigned int boundndx = defsym - (ElfW(Sym) *) D_PTR (result, ++ l_info[DT_SYMTAB]); ++ if (!for_jmp_slot) ++ { ++ reloc_result->bound = result; ++ reloc_result->boundndx = boundndx; ++ } + + if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) + { + /* Set all bits since this symbol binding is not interesting. */ +- reloc_result->enterexit = (1u << DL_NNS) - 1; ++ if (!for_jmp_slot) ++ reloc_result->enterexit = (1u << DL_NNS) - 1; + return; + } + +@@ -199,12 +206,13 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + two bits. */ + assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); + assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); +- reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; ++ uint32_t enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; + + const char *strtab2 = (const void *) D_PTR (result, l_info[DT_STRTAB]); + + unsigned int flags = 0; + struct audit_ifaces *afct = GLRO(dl_audit); ++ uintptr_t new_value = (uintptr_t) sym.st_value; + for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) + { + /* XXX Check whether both DSOs must request action or only one */ +@@ -215,37 +223,41 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + { + if (afct->symbind != NULL) + { +- uintptr_t new_value = afct->symbind (&sym, +- reloc_result->boundndx, +- &l_state->cookie, +- &result_state->cookie, +- &flags, +- strtab2 + defsym->st_name); ++ flags |= for_jmp_slot ? LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT ++ : 0; ++ new_value = afct->symbind (&sym, boundndx, ++ &l_state->cookie, ++ &result_state->cookie, &flags, ++ strtab2 + defsym->st_name); + if (new_value != (uintptr_t) sym.st_value) + { + flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; ++ sym.st_value = for_jmp_slot ++ ? DL_FIXUP_BINDNOW_ADDR_VALUE (new_value) : new_value; + } + } + + /* Remember the results for every audit library and store a summary + in the first two bits. */ +- reloc_result->enterexit &= flags & (LA_SYMB_NOPLTENTER +- | LA_SYMB_NOPLTEXIT); +- reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER +- | LA_SYMB_NOPLTEXIT)) +- << ((cnt + 1) * 2)); ++ enterexit &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT); ++ enterexit |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) ++ << ((cnt + 1) * 2)); + } + else + /* If the bind flags say this auditor is not interested, set the bits + manually. */ +- reloc_result->enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) +- << ((cnt + 1) * 2)); ++ enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ++ << ((cnt + 1) * 2)); + afct = afct->next; + } + +- reloc_result->flags = flags; +- *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++ if (!for_jmp_slot) ++ { ++ reloc_result->enterexit = enterexit; ++ reloc_result->flags = flags; ++ } ++ ++ DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); + } + + void +diff --git a/elf/do-rel.h b/elf/do-rel.h +index f441b749190c2641..4b7fc14f74cb5d09 100644 +--- a/elf/do-rel.h ++++ b/elf/do-rel.h +@@ -16,6 +16,8 @@ + License along with the GNU C Library; if not, see + . */ + ++#include ++ + /* This file may be included twice, to define both + `elf_dynamic_do_rel' and `elf_dynamic_do_rela'. */ + +@@ -123,6 +125,10 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + + for (; r < end; ++r) + { ++ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; ++ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)]; ++ void *const r_addr_arg = (void *) (l_addr + r->r_offset); ++ const struct r_found_version *rversion = &map->l_versions[ndx]; + #if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP + if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) + { +@@ -133,10 +139,19 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + } + #endif + +- ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], +- &map->l_versions[ndx], +- (void *) (l_addr + r->r_offset), skip_ifunc); ++ elf_machine_rel (map, scope, r, sym, rversion, r_addr_arg, ++ skip_ifunc); ++#if defined SHARED && !defined RTLD_BOOTSTRAP ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT ++ && GLRO(dl_naudit) > 0) ++ { ++ struct link_map *sym_map ++ = RESOLVE_MAP (map, scope, &sym, rversion, ++ ELF_MACHINE_JMP_SLOT); ++ if (sym != NULL) ++ _dl_audit_symbind (map, NULL, sym, r_addr_arg, sym_map); ++ } ++#endif + } + + #if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP +@@ -158,17 +173,33 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + else + { + for (; r < end; ++r) ++ { ++ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)]; ++ void *const r_addr_arg = (void *) (l_addr + r->r_offset); + # ifdef ELF_MACHINE_IRELATIVE +- if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) +- { +- if (r2 == NULL) +- r2 = r; +- end2 = r; +- } +- else ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) ++ { ++ if (r2 == NULL) ++ r2 = r; ++ end2 = r; ++ continue; ++ } + # endif +- elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, +- (void *) (l_addr + r->r_offset), skip_ifunc); ++ elf_machine_rel (map, scope, r, sym, NULL, r_addr_arg, ++ skip_ifunc); ++# if defined SHARED && !defined RTLD_BOOTSTRAP ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT ++ && GLRO(dl_naudit) > 0) ++ { ++ struct link_map *sym_map ++ = RESOLVE_MAP (map, scope, &sym, ++ (struct r_found_version *) NULL, ++ ELF_MACHINE_JMP_SLOT); ++ if (sym != NULL) ++ _dl_audit_symbind (map, NULL , sym,r_addr_arg, sym_map); ++ } ++# endif ++ } + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) +diff --git a/elf/sotruss-lib.c b/elf/sotruss-lib.c +index b711f7b0c892a972..e4ebc8dbc697df3f 100644 +--- a/elf/sotruss-lib.c ++++ b/elf/sotruss-lib.c +@@ -17,6 +17,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + #include + #include +@@ -232,6 +233,12 @@ uintptr_t + la_symbind (Elf_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) + { ++ if (*flags & LA_SYMB_NOPLTENTER) ++ warnx ("cannot trace PLT enter (bind-now enabled)"); ++ ++ if (do_exit && *flags & LA_SYMB_NOPLTEXIT) ++ warnx ("cannot trace PLT exit (bind-now enabled)"); ++ + if (!do_exit) + *flags = LA_SYMB_NOPLTEXIT; + +diff --git a/elf/tst-audit24a.c b/elf/tst-audit24a.c +new file mode 100644 +index 0000000000000000..a1781c9b45f18fa0 +--- /dev/null ++++ b/elf/tst-audit24a.c +@@ -0,0 +1,36 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int tst_audit24amod1_func1 (void); ++int tst_audit24amod1_func2 (void); ++int tst_audit24amod2_func1 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24amod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24amod1_func2 (), 2); ++ TEST_COMPARE (tst_audit24amod2_func1 (), 10); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24amod1.c b/elf/tst-audit24amod1.c +new file mode 100644 +index 0000000000000000..0289a4abefbc7bbb +--- /dev/null ++++ b/elf/tst-audit24amod1.c +@@ -0,0 +1,31 @@ ++/* Module used by tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24amod1_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24amod1_func2 (void) ++{ ++ return 2; ++} +diff --git a/elf/tst-audit24amod2.c b/elf/tst-audit24amod2.c +new file mode 100644 +index 0000000000000000..1562afc9dfc1b9b3 +--- /dev/null ++++ b/elf/tst-audit24amod2.c +@@ -0,0 +1,25 @@ ++/* Module used by tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24amod2_func1 (void) ++{ ++ abort (); ++} +diff --git a/elf/tst-audit24b.c b/elf/tst-audit24b.c +new file mode 100644 +index 0000000000000000..567bee52c27f4361 +--- /dev/null ++++ b/elf/tst-audit24b.c +@@ -0,0 +1,37 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This is similar to tst-audit24a, with the difference this modules ++ does not have the .gnu.version section header. */ ++ ++#include ++#include ++ ++int tst_audit24bmod1_func1 (void); ++int tst_audit24bmod1_func2 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24bmod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24bmod1_func2 (), 2); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24bmod1.c b/elf/tst-audit24bmod1.c +new file mode 100644 +index 0000000000000000..57ce14a01bf72fb6 +--- /dev/null ++++ b/elf/tst-audit24bmod1.c +@@ -0,0 +1,31 @@ ++/* Module used by tst-audit24c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int tst_audit24bmod2_func1 (void); ++ ++int ++tst_audit24bmod1_func1 (void) ++{ ++ return -1; ++} ++ ++int ++tst_audit24bmod1_func2 (void) ++{ ++ return tst_audit24bmod2_func1 (); ++} +diff --git a/elf/tst-audit24bmod2.c b/elf/tst-audit24bmod2.c +new file mode 100644 +index 0000000000000000..b298ce0a05bf2db2 +--- /dev/null ++++ b/elf/tst-audit24bmod2.c +@@ -0,0 +1,23 @@ ++/* Module used by tst-audit24b. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++tst_audit24bmod2_func1 (void) ++{ ++ return -1; ++} +diff --git a/elf/tst-audit24c.c b/elf/tst-audit24c.c +new file mode 100644 +index 0000000000000000..46ed328756067276 +--- /dev/null ++++ b/elf/tst-audit24c.c +@@ -0,0 +1,2 @@ ++/* It tests LD_BIND_NOW=1 instead of linking with -Wl,-z,now */ ++#include "tst-audit24a.c" +diff --git a/elf/tst-audit24d.c b/elf/tst-audit24d.c +new file mode 100644 +index 0000000000000000..543f3b86a6bbdead +--- /dev/null ++++ b/elf/tst-audit24d.c +@@ -0,0 +1,36 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int tst_audit24dmod1_func1 (void); ++int tst_audit24dmod1_func2 (void); ++int tst_audit24dmod2_func1 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24dmod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24dmod1_func2 (), 32); ++ TEST_COMPARE (tst_audit24dmod2_func1 (), 10); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24dmod1.c b/elf/tst-audit24dmod1.c +new file mode 100644 +index 0000000000000000..e563f69d638ac3f5 +--- /dev/null ++++ b/elf/tst-audit24dmod1.c +@@ -0,0 +1,33 @@ ++/* Module used by tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_audit24dmod3_func1 (void); ++ ++_Noreturn int ++tst_audit24dmod1_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24dmod1_func2 (void) ++{ ++ return 2 + tst_audit24dmod3_func1 ();; ++} +diff --git a/elf/tst-audit24dmod2.c b/elf/tst-audit24dmod2.c +new file mode 100644 +index 0000000000000000..03fe9381281e5790 +--- /dev/null ++++ b/elf/tst-audit24dmod2.c +@@ -0,0 +1,28 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_audit24dmod4_func1 (void); ++ ++_Noreturn int ++tst_audit24dmod2_func1 (void) ++{ ++ tst_audit24dmod4_func1 (); ++ abort (); ++} +diff --git a/elf/tst-audit24dmod3.c b/elf/tst-audit24dmod3.c +new file mode 100644 +index 0000000000000000..106d517d2887d76c +--- /dev/null ++++ b/elf/tst-audit24dmod3.c +@@ -0,0 +1,31 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24dmod3_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24dmod3_func2 (void) ++{ ++ return 4; ++} +diff --git a/elf/tst-audit24dmod4.c b/elf/tst-audit24dmod4.c +new file mode 100644 +index 0000000000000000..1da3b46917ba1083 +--- /dev/null ++++ b/elf/tst-audit24dmod4.c +@@ -0,0 +1,25 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24dmod4_func1 (void) ++{ ++ abort (); ++} +diff --git a/elf/tst-audit25a.c b/elf/tst-audit25a.c +new file mode 100644 +index 0000000000000000..49173e862516e876 +--- /dev/null ++++ b/elf/tst-audit25a.c +@@ -0,0 +1,129 @@ ++/* Check LD_AUDIT and LD_BIND_NOW. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++void tst_audit25mod1_func1 (void); ++void tst_audit25mod1_func2 (void); ++void tst_audit25mod2_func1 (void); ++void tst_audit25mod2_func2 (void); ++ ++static int ++handle_restart (void) ++{ ++ tst_audit25mod1_func1 (); ++ tst_audit25mod1_func2 (); ++ tst_audit25mod2_func1 (); ++ tst_audit25mod2_func2 (); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ setenv ("LD_AUDIT", "tst-auditmod25.so", 0); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ TEST_VERIFY_EXIT (i < array_length (spargv)); ++ ++ { ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* tst-audit25a is build with -Wl,-z,lazy and tst-audit25mod1 with ++ -Wl,-z,now; so only tst_audit25mod3_func1 should be expected to ++ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 0\n" ++ "la_symbind: tst_audit25mod1_func2 0\n" ++ "la_symbind: tst_audit25mod2_func1 0\n" ++ "la_symbind: tst_audit25mod4_func1 0\n" ++ "la_symbind: tst_audit25mod2_func2 0\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ { ++ setenv ("LD_BIND_NOW", "1", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* With LD_BIND_NOW all symbols are expected to have ++ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution ++ order is done in breadth-first order. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod4_func1 1\n" ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit25b.c b/elf/tst-audit25b.c +new file mode 100644 +index 0000000000000000..a56638d501f9bff5 +--- /dev/null ++++ b/elf/tst-audit25b.c +@@ -0,0 +1,128 @@ ++/* Check LD_AUDIT and LD_BIND_NOW. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++void tst_audit25mod1_func1 (void); ++void tst_audit25mod1_func2 (void); ++void tst_audit25mod2_func1 (void); ++void tst_audit25mod2_func2 (void); ++ ++static int ++handle_restart (void) ++{ ++ tst_audit25mod1_func1 (); ++ tst_audit25mod1_func2 (); ++ tst_audit25mod2_func1 (); ++ tst_audit25mod2_func2 (); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ setenv ("LD_AUDIT", "tst-auditmod25.so", 0); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ { ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* tst-audit25a and tst-audit25mod1 are built with -Wl,-z,now, but ++ tst-audit25mod2 is built with -Wl,-z,lazy. So only ++ tst_audit25mod4_func1 (called by tst_audit25mod2_func1) should not ++ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n" ++ "la_symbind: tst_audit25mod4_func1 0\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ { ++ setenv ("LD_BIND_NOW", "1", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* With LD_BIND_NOW all symbols are expected to have ++ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution ++ order is done in breadth-first order. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod4_func1 1\n" ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit25mod1.c b/elf/tst-audit25mod1.c +new file mode 100644 +index 0000000000000000..a132e34a9b2cf51f +--- /dev/null ++++ b/elf/tst-audit25mod1.c +@@ -0,0 +1,30 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void tst_audit25mod3_func1 (void); ++ ++void ++tst_audit25mod1_func1 (void) ++{ ++ tst_audit25mod3_func1 (); ++} ++ ++void ++tst_audit25mod1_func2 (void) ++{ ++} +diff --git a/elf/tst-audit25mod2.c b/elf/tst-audit25mod2.c +new file mode 100644 +index 0000000000000000..92da26fa80b202c2 +--- /dev/null ++++ b/elf/tst-audit25mod2.c +@@ -0,0 +1,30 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void tst_audit25mod4_func1 (void); ++ ++void ++tst_audit25mod2_func1 (void) ++{ ++ tst_audit25mod4_func1 (); ++} ++ ++void ++tst_audit25mod2_func2 (void) ++{ ++} +diff --git a/elf/tst-audit25mod3.c b/elf/tst-audit25mod3.c +new file mode 100644 +index 0000000000000000..af83e8919083adef +--- /dev/null ++++ b/elf/tst-audit25mod3.c +@@ -0,0 +1,22 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void ++tst_audit25mod3_func1 (void) ++{ ++} +diff --git a/elf/tst-audit25mod4.c b/elf/tst-audit25mod4.c +new file mode 100644 +index 0000000000000000..6cdf34357582da16 +--- /dev/null ++++ b/elf/tst-audit25mod4.c +@@ -0,0 +1,22 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void ++tst_audit25mod4_func1 (void) ++{ ++} +diff --git a/elf/tst-auditmod24.h b/elf/tst-auditmod24.h +new file mode 100644 +index 0000000000000000..5fdbfef12dac2b2a +--- /dev/null ++++ b/elf/tst-auditmod24.h +@@ -0,0 +1,29 @@ ++/* Auxiliary functions for tst-audit24x. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDITMOD24_H ++#define _TST_AUDITMOD24_H ++ ++static void ++test_symbind_flags (unsigned int flags) ++{ ++ if ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) == 0) ++ abort (); ++} ++ ++#endif +diff --git a/elf/tst-auditmod24a.c b/elf/tst-auditmod24a.c +new file mode 100644 +index 0000000000000000..d8e88f3984af1707 +--- /dev/null ++++ b/elf/tst-auditmod24a.c +@@ -0,0 +1,114 @@ ++/* Audit modules for tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT24_COOKIE 0x1 ++#define AUDIT24MOD1_COOKIE 0x2 ++#define AUDIT24MOD2_COOKIE 0x3 ++ ++#ifndef TEST_NAME ++# define TEST_NAME "tst-audit24a" ++#endif ++#ifndef TEST_MOD ++# define TEST_MOD TEST_NAME ++#endif ++#ifndef TEST_FUNC ++# define TEST_FUNC "tst_audit24a" ++#endif ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_MOD "mod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_NAME) == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_func2 (void) ++{ ++ return 10; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ /* Check if bind-now symbols are advertised to not call the PLT ++ hooks. */ ++ test_symbind_flags (*flags); ++ ++ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0) ++ return (uintptr_t) tst_func1; ++ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0)) ++ { ++ test_symbind_flags (*flags); ++ ++ return (uintptr_t) tst_func2; ++ } ++ ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod24b.c b/elf/tst-auditmod24b.c +new file mode 100644 +index 0000000000000000..e98f6d5ec528fe03 +--- /dev/null ++++ b/elf/tst-auditmod24b.c +@@ -0,0 +1,104 @@ ++/* Audit modules for tst-audit24b. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define TEST_NAME "tst-audit24b" ++#define TEST_FUNC "tst_audit24b" ++ ++#define AUDIT24_COOKIE 0x1 ++#define AUDIT24MOD1_COOKIE 0x2 ++#define AUDIT24MOD2_COOKIE 0x3 ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_NAME "mod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_NAME "mod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_NAME) == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_func2 (void) ++{ ++ return 2; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0) ++ return (uintptr_t) tst_func1; ++ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ else if (*refcook == AUDIT24MOD1_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0)) ++ { ++ test_symbind_flags (*flags); ++ return (uintptr_t) tst_func2; ++ } ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod24c.c b/elf/tst-auditmod24c.c +new file mode 100644 +index 0000000000000000..67e62c9d332f48a7 +--- /dev/null ++++ b/elf/tst-auditmod24c.c +@@ -0,0 +1,3 @@ ++#define TEST_NAME "tst-audit24c" ++#define TEST_MOD "tst-audit24a" ++#include "tst-auditmod24a.c" +diff --git a/elf/tst-auditmod24d.c b/elf/tst-auditmod24d.c +new file mode 100644 +index 0000000000000000..8c803ecc0a48f21b +--- /dev/null ++++ b/elf/tst-auditmod24d.c +@@ -0,0 +1,120 @@ ++/* Audit module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT24_COOKIE 0x0 ++#define AUDIT24MOD1_COOKIE 0x1 ++#define AUDIT24MOD2_COOKIE 0x2 ++#define AUDIT24MOD3_COOKIE 0x3 ++#define AUDIT24MOD4_COOKIE 0x4 ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? "tst-audit24d" : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, "tst-audit24dmod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod3.so") == 0) ++ ck = AUDIT24MOD3_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod.so") == 0) ++ ck = AUDIT24MOD4_COOKIE; ++ else if (strcmp (l_name, "tst-audit24d") == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_audit24dmod1_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_audit24dmod2_func1 (void) ++{ ++ return 10; ++} ++ ++static int ++tst_audit24dmod3_func1 (void) ++{ ++ return 30; ++} ++ ++#include ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ if (strcmp (symname, "tst_audit24dmod1_func1") == 0) ++ return (uintptr_t) tst_audit24dmod1_func1; ++ else if (strcmp (symname, "tst_audit24dmod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, "tst_audit24dmod2_func1") == 0)) ++ return (uintptr_t) tst_audit24dmod2_func1; ++ ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ else if (*refcook == AUDIT24MOD1_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD3_COOKIE ++ && strcmp (symname, "tst_audit24dmod3_func1") == 0) ++ { ++ test_symbind_flags (*flags); ++ ++ return (uintptr_t) tst_audit24dmod3_func1; ++ } ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod25.c b/elf/tst-auditmod25.c +new file mode 100644 +index 0000000000000000..526f5c54bc2c3b8c +--- /dev/null ++++ b/elf/tst-auditmod25.c +@@ -0,0 +1,79 @@ ++/* Audit modules for tst-audit25a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT25_COOKIE 0x1 ++#define AUDIT25MOD1_COOKIE 0x2 ++#define AUDIT25MOD2_COOKIE 0x3 ++#define AUDIT25MOD3_COOKIE 0x2 ++#define AUDIT25MOD4_COOKIE 0x3 ++ ++#define TEST_NAME "tst-audit25" ++#define TEST_MOD "tst-audit25" ++#define TEST_FUNC "tst_audit25" ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_MOD "mod1.so") == 0) ++ ck = AUDIT25MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0) ++ ck = AUDIT25MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod3.so") == 0) ++ ck = AUDIT25MOD3_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod4.so") == 0) ++ ck = AUDIT25MOD4_COOKIE; ++ else if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT25_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook != -1 && *defcook != -1) ++ fprintf (stderr, "la_symbind: %s %u\n", symname, ++ *flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ? 1 : 0); ++ return sym->st_value; ++} +diff --git a/sysdeps/generic/dl-lookupcfg.h b/sysdeps/generic/dl-lookupcfg.h +index c038c31ce6550059..a15fd32771d42f2a 100644 +--- a/sysdeps/generic/dl-lookupcfg.h ++++ b/sysdeps/generic/dl-lookupcfg.h +@@ -26,3 +26,6 @@ + #define DL_FIXUP_VALUE_CODE_ADDR(value) (value) + #define DL_FIXUP_VALUE_ADDR(value) (value) + #define DL_FIXUP_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = st_value; +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index a56060d0204cc453..a38de94bf7ea8e93 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1403,7 +1403,10 @@ void _dl_audit_objclose (struct link_map *l) + /* Call the la_preinit from the audit modules for the link_map L. */ + void _dl_audit_preinit (struct link_map *l); + +-/* Call the la_symbind{32,64} from the audit modules for the link_map L. */ ++/* Call the la_symbind{32,64} from the audit modules for the link_map L. If ++ RELOC_RESULT is NULL it assumes the symbol to be bind-now and will set ++ the flags with LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT prior calling ++ la_symbind{32,64}. */ + void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, + lookup_t result) +diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h +index 2f6991aa16e87a00..f4f00714fa158e18 100644 +--- a/sysdeps/hppa/dl-lookupcfg.h ++++ b/sysdeps/hppa/dl-lookupcfg.h +@@ -81,3 +81,6 @@ void attribute_hidden _dl_unmap (struct link_map *map); + #define DL_FIXUP_VALUE_CODE_ADDR(value) ((value).ip) + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) + #define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = *(struct fdesc *) (st_value) +diff --git a/sysdeps/ia64/dl-lookupcfg.h b/sysdeps/ia64/dl-lookupcfg.h +index 58ca32424b08aaf4..2b8b2fa5db9d7093 100644 +--- a/sysdeps/ia64/dl-lookupcfg.h ++++ b/sysdeps/ia64/dl-lookupcfg.h +@@ -74,3 +74,6 @@ extern void attribute_hidden _dl_unmap (struct link_map *map); + + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) + #define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = *(struct fdesc *) (st_value) +diff --git a/sysdeps/powerpc/dl-lookupcfg.h b/sysdeps/powerpc/dl-lookupcfg.h +new file mode 100644 +index 0000000000000000..25abcc1d12b15bfc +--- /dev/null ++++ b/sysdeps/powerpc/dl-lookupcfg.h +@@ -0,0 +1,39 @@ ++/* Configuration of lookup functions. PowerPC version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define DL_FIXUP_VALUE_TYPE ElfW(Addr) ++#define DL_FIXUP_MAKE_VALUE(map, addr) (addr) ++#define DL_FIXUP_VALUE_CODE_ADDR(value) (value) ++#define DL_FIXUP_VALUE_ADDR(value) (value) ++#define DL_FIXUP_ADDR_VALUE(addr) (addr) ++#if __WORDSIZE == 64 && _CALL_ELF == 1 ++/* We need to correctly set the audit modules value for bind-now. */ ++# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) \ ++ (((Elf64_FuncDesc *)(addr))->fd_func) ++# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ ({ \ ++ Elf64_FuncDesc *opd = (Elf64_FuncDesc *) (value); \ ++ opd->fd_func = (st_value); \ ++ if ((new_value) != (uintptr_t) (st_value)) \ ++ opd->fd_toc = ((Elf64_FuncDesc *)(new_value))->fd_toc; \ ++ }) ++#else ++# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = st_value; ++#endif diff --git a/SOURCES/glibc-upstream-2.34-157.patch b/SOURCES/glibc-upstream-2.34-157.patch new file mode 100644 index 0000000..0f5fd3c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-157.patch @@ -0,0 +1,1042 @@ +commit b118bce87a7c581cb2d6d7698eb582e45aed3481 +Author: Ben Woodard +Date: Mon Jan 24 10:46:18 2022 -0300 + + elf: Fix runtime linker auditing on aarch64 (BZ #26643) + + The rtld audit support show two problems on aarch64: + + 1. _dl_runtime_resolve does not preserve x8, the indirect result + location register, which might generate wrong result calls + depending of the function signature. + + 2. The NEON Q registers pushed onto the stack by _dl_runtime_resolve + were twice the size of D registers extracted from the stack frame by + _dl_runtime_profile. + + While 2. might result in wrong information passed on the PLT tracing, + 1. generates wrong runtime behaviour. + + The aarch64 rtld audit support is changed to: + + * Both La_aarch64_regs and La_aarch64_retval are expanded to include + both x8 and the full sized NEON V registers, as defined by the + ABI. + + * dl_runtime_profile needed to extract registers saved by + _dl_runtime_resolve and put them into the new correctly sized + La_aarch64_regs structure. + + * The LAV_CURRENT check is change to only accept new audit modules + to avoid the undefined behavior of not save/restore x8. + + * Different than other architectures, audit modules older than + LAV_CURRENT are rejected (both La_aarch64_regs and La_aarch64_retval + changed their layout and there are no requirements to support multiple + audit interface with the inherent aarch64 issues). + + * A new field is also reserved on both La_aarch64_regs and + La_aarch64_retval to support variant pcs symbols. + + Similar to x86, a new La_aarch64_vector type to represent the NEON + register is added on the La_aarch64_regs (so each type can be accessed + directly). + + Since LAV_CURRENT was already bumped to support bind-now, there is + no need to increase it again. + + Checked on aarch64-linux-gnu. + + Co-authored-by: Adhemerval Zanella + Reviewed-by: Szabolcs Nagy + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit ce9a68c57c260c8417afc93972849ac9ad243ec4) + + Resolved conflicts: + NEWS + elf/rtld.c + +diff --git a/elf/rtld.c b/elf/rtld.c +index 26c6fb6479c9008c..434fbeddd5cce74d 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + +@@ -991,7 +992,7 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n", + return; + } + +- if (lav > LAV_CURRENT) ++ if (!_dl_audit_check_version (lav)) + { + _dl_debug_printf ("\ + ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n", +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index 7c66fb97aa065f99..7183895d04ea8d42 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -10,6 +10,26 @@ endif + + ifeq ($(subdir),elf) + sysdep-dl-routines += dl-bti ++ ++tests += tst-audit26 \ ++ tst-audit27 ++ ++modules-names += \ ++ tst-audit26mod \ ++ tst-auditmod26 \ ++ tst-audit27mod \ ++ tst-auditmod27 ++ ++$(objpfx)tst-audit26: $(objpfx)tst-audit26mod.so \ ++ $(objpfx)tst-auditmod26.so ++LDFLAGS-tst-audit26 += -Wl,-z,lazy ++tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so ++ ++$(objpfx)tst-audit27: $(objpfx)tst-audit27mod.so \ ++ $(objpfx)tst-auditmod27.so ++$(objpfx)tst-audit27mod.so: $(libsupport) ++LDFLAGS-tst-audit27 += -Wl,-z,lazy ++tst-audit27-ENV = LD_AUDIT=$(objpfx)tst-auditmod27.so + endif + + ifeq ($(subdir),elf) +diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h +index 774bbe5f4544f559..c64726947c9addea 100644 +--- a/sysdeps/aarch64/bits/link.h ++++ b/sysdeps/aarch64/bits/link.h +@@ -20,23 +20,31 @@ + # error "Never include directly; use instead." + #endif + ++typedef union ++{ ++ float s; ++ double d; ++ long double q; ++} La_aarch64_vector; ++ + /* Registers for entry into PLT on AArch64. */ + typedef struct La_aarch64_regs + { +- uint64_t lr_xreg[8]; +- uint64_t lr_dreg[8]; +- uint64_t lr_sp; +- uint64_t lr_lr; ++ uint64_t lr_xreg[9]; ++ La_aarch64_vector lr_vreg[8]; ++ uint64_t lr_sp; ++ uint64_t lr_lr; ++ void *lr_vpcs; + } La_aarch64_regs; + + /* Return values for calls from PLT on AArch64. */ + typedef struct La_aarch64_retval + { +- /* Up to two integer registers can be used for a return value. */ +- uint64_t lrv_xreg[2]; +- /* Up to four D registers can be used for a return value. */ +- uint64_t lrv_dreg[4]; +- ++ /* Up to eight integer registers can be used for a return value. */ ++ uint64_t lrv_xreg[8]; ++ /* Up to eight V registers can be used for a return value. */ ++ La_aarch64_vector lrv_vreg[8]; ++ void *lrv_vpcs; + } La_aarch64_retval; + __BEGIN_DECLS + +diff --git a/sysdeps/aarch64/dl-audit-check.h b/sysdeps/aarch64/dl-audit-check.h +new file mode 100644 +index 0000000000000000..e324339a1d4abec3 +--- /dev/null ++++ b/sysdeps/aarch64/dl-audit-check.h +@@ -0,0 +1,28 @@ ++/* rtld-audit version check. AArch64 version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ /* Audit version 1 do not save x8 or NEON registers, which required ++ changing La_aarch64_regs and La_aarch64_retval layout (BZ#26643). The ++ missing indirect result save/restore makes _dl_runtime_profile ++ potentially trigger undefined behavior if the function returns a large ++ struct (even when PLT trace is not requested). */ ++ return lav == LAV_CURRENT; ++} +diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym +index d67d28b40ce7d4ff..cb4dcdcbed0db492 100644 +--- a/sysdeps/aarch64/dl-link.sym ++++ b/sysdeps/aarch64/dl-link.sym +@@ -7,9 +7,11 @@ DL_SIZEOF_RG sizeof(struct La_aarch64_regs) + DL_SIZEOF_RV sizeof(struct La_aarch64_retval) + + DL_OFFSET_RG_X0 offsetof(struct La_aarch64_regs, lr_xreg) +-DL_OFFSET_RG_D0 offsetof(struct La_aarch64_regs, lr_dreg) ++DL_OFFSET_RG_V0 offsetof(struct La_aarch64_regs, lr_vreg) + DL_OFFSET_RG_SP offsetof(struct La_aarch64_regs, lr_sp) + DL_OFFSET_RG_LR offsetof(struct La_aarch64_regs, lr_lr) ++DL_OFFSET_RG_VPCS offsetof(struct La_aarch64_regs, lr_vpcs) + + DL_OFFSET_RV_X0 offsetof(struct La_aarch64_retval, lrv_xreg) +-DL_OFFSET_RV_D0 offsetof(struct La_aarch64_retval, lrv_dreg) ++DL_OFFSET_RV_V0 offsetof(struct La_aarch64_retval, lrv_vreg) ++DL_OFFSET_RV_VPCS offsetof(struct La_aarch64_retval, lrv_vpcs) +diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S +index 9b352b1d0f7d62e7..457570e7df5148c0 100644 +--- a/sysdeps/aarch64/dl-trampoline.S ++++ b/sysdeps/aarch64/dl-trampoline.S +@@ -45,7 +45,8 @@ _dl_runtime_resolve: + + cfi_rel_offset (lr, 8) + +- /* Save arguments. */ ++ /* Note: Saving x9 is not required by the ABI but the assembler requires ++ the immediate values of operand 3 to be a multiple of 16 */ + stp x8, x9, [sp, #-(80+8*16)]! + cfi_adjust_cfa_offset (80+8*16) + cfi_rel_offset (x8, 0) +@@ -142,7 +143,7 @@ _dl_runtime_profile: + Stack frame layout: + [sp, #...] lr + [sp, #...] &PLTGOT[n] +- [sp, #96] La_aarch64_regs ++ [sp, #256] La_aarch64_regs + [sp, #48] La_aarch64_retval + [sp, #40] frame size return from pltenter + [sp, #32] dl_profile_call saved x1 +@@ -183,19 +184,25 @@ _dl_runtime_profile: + stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0) + cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8) +- +- stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0) +- cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8) +- stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1] +- cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0) +- cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8) +- stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0) +- cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8) +- stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] +- cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0) +- cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8) ++ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0] ++ cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0) ++ /* Note 8 bytes of padding is in the stack frame for alignment */ ++ ++ stp q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0) ++ cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16) ++ stp q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1] ++ cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0) ++ cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16) ++ stp q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0) ++ cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16) ++ stp q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] ++ cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0) ++ cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16) ++ ++ /* No APCS extension supported. */ ++ str xzr, [X29, #OFFSET_RG + DL_OFFSET_RG_VPCS] + + add x0, x29, #SF_SIZE + 16 + ldr x1, [x29, #OFFSET_LR] +@@ -234,10 +241,11 @@ _dl_runtime_profile: + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] +- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] +- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] ++ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] + + cfi_def_cfa_register (sp) + ldp x29, x30, [x29, #0] +@@ -280,14 +288,22 @@ _dl_runtime_profile: + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] +- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] +- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] ++ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] + blr ip0 +- stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] +- stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +- stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] ++ stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0] ++ stp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1] ++ stp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2] ++ stp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3] ++ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ stp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0] ++ stp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1] ++ stp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2] ++ stp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3] ++ str xzr, [X29, #OFFSET_RV + DL_OFFSET_RG_VPCS] + + /* Setup call to pltexit */ + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] +@@ -295,9 +311,16 @@ _dl_runtime_profile: + add x3, x29, #OFFSET_RV + bl _dl_audit_pltexit + +- ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] +- ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] ++ ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0] ++ ldp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1] ++ ldp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2] ++ ldp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3] ++ ldr x8, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3] ++ + /* LR from within La_aarch64_reg */ + ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] + cfi_restore(lr) +diff --git a/sysdeps/aarch64/tst-audit26.c b/sysdeps/aarch64/tst-audit26.c +new file mode 100644 +index 0000000000000000..46de8acd219cb8bc +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26.c +@@ -0,0 +1,37 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit26mod.h" ++ ++int ++do_test (void) ++{ ++ /* Returning a large struct uses 'x8' as indirect result location. */ ++ struct large_struct r = tst_audit26_func (ARG1, ARG2, ARG3); ++ ++ struct large_struct e = set_large_struct (ARG1, ARG2, ARG3); ++ ++ TEST_COMPARE_BLOB (r.a, sizeof (r.a), e.a, sizeof (e.a)); ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/aarch64/tst-audit26mod.c b/sysdeps/aarch64/tst-audit26mod.c +new file mode 100644 +index 0000000000000000..67d5ffce7288b34c +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26mod.c +@@ -0,0 +1,33 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include "tst-audit26mod.h" ++ ++struct large_struct ++tst_audit26_func (char a, short b, long int c) ++{ ++ if (a != ARG1) ++ abort (); ++ if (b != ARG2) ++ abort (); ++ if (c != ARG3) ++ abort (); ++ ++ return set_large_struct (a, b, c); ++} +diff --git a/sysdeps/aarch64/tst-audit26mod.h b/sysdeps/aarch64/tst-audit26mod.h +new file mode 100644 +index 0000000000000000..f80409f96bae6c82 +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26mod.h +@@ -0,0 +1,50 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDIT27MOD_H ++#define _TST_AUDIT27MOD_H 1 ++ ++#include ++ ++struct large_struct ++{ ++ char a[16]; ++ short b[8]; ++ long int c[4]; ++}; ++ ++static inline struct large_struct ++set_large_struct (char a, short b, long int c) ++{ ++ struct large_struct r; ++ for (int i = 0; i < array_length (r.a); i++) ++ r.a[i] = a; ++ for (int i = 0; i < array_length (r.b); i++) ++ r.b[i] = b; ++ for (int i = 0; i < array_length (r.c); i++) ++ r.c[i] = c; ++ return r; ++} ++ ++#define ARG1 0x12 ++#define ARG2 0x1234 ++#define ARG3 0x12345678 ++ ++struct large_struct tst_audit26_func (char a, short b, long int c); ++ ++#endif +diff --git a/sysdeps/aarch64/tst-audit27.c b/sysdeps/aarch64/tst-audit27.c +new file mode 100644 +index 0000000000000000..5ebc09771f845af0 +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27.c +@@ -0,0 +1,64 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++int ++do_test (void) ++{ ++ { ++ float r = tst_audit27_func_float (FUNC_FLOAT_ARG0, FUNC_FLOAT_ARG1, ++ FUNC_FLOAT_ARG2, FUNC_FLOAT_ARG3, ++ FUNC_FLOAT_ARG4, FUNC_FLOAT_ARG5, ++ FUNC_FLOAT_ARG6, FUNC_FLOAT_ARG7); ++ if (r != FUNC_FLOAT_RET) ++ FAIL_EXIT1 ("tst_audit27_func_float() returned %a, expected %a", ++ r, FUNC_FLOAT_RET); ++ } ++ ++ { ++ double r = tst_audit27_func_double (FUNC_DOUBLE_ARG0, FUNC_DOUBLE_ARG1, ++ FUNC_DOUBLE_ARG2, FUNC_DOUBLE_ARG3, ++ FUNC_DOUBLE_ARG4, FUNC_DOUBLE_ARG5, ++ FUNC_DOUBLE_ARG6, FUNC_DOUBLE_ARG7); ++ if (r != FUNC_DOUBLE_RET) ++ FAIL_EXIT1 ("tst_audit27_func_double() returned %la, expected %la", ++ r, FUNC_DOUBLE_RET); ++ } ++ ++ { ++ long double r = tst_audit27_func_ldouble (FUNC_LDOUBLE_ARG0, ++ FUNC_LDOUBLE_ARG1, ++ FUNC_LDOUBLE_ARG2, ++ FUNC_LDOUBLE_ARG3, ++ FUNC_LDOUBLE_ARG4, ++ FUNC_LDOUBLE_ARG5, ++ FUNC_LDOUBLE_ARG6, ++ FUNC_LDOUBLE_ARG7); ++ if (r != FUNC_LDOUBLE_RET) ++ FAIL_EXIT1 ("tst_audit27_func_ldouble() returned %La, expected %La", ++ r, FUNC_LDOUBLE_RET); ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/aarch64/tst-audit27mod.c b/sysdeps/aarch64/tst-audit27mod.c +new file mode 100644 +index 0000000000000000..922b518f0af4b97b +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27mod.c +@@ -0,0 +1,95 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++float ++tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4, ++ float a5, float a6, float a7) ++{ ++ if (a0 != FUNC_FLOAT_ARG0) ++ FAIL_EXIT1 ("a0: %a != %a", a0, FUNC_FLOAT_ARG0); ++ if (a1 != FUNC_FLOAT_ARG1) ++ FAIL_EXIT1 ("a1: %a != %a", a1, FUNC_FLOAT_ARG1); ++ if (a2 != FUNC_FLOAT_ARG2) ++ FAIL_EXIT1 ("a2: %a != %a", a2, FUNC_FLOAT_ARG2); ++ if (a3 != FUNC_FLOAT_ARG3) ++ FAIL_EXIT1 ("a3: %a != %a", a3, FUNC_FLOAT_ARG3); ++ if (a4 != FUNC_FLOAT_ARG4) ++ FAIL_EXIT1 ("a4: %a != %a", a4, FUNC_FLOAT_ARG4); ++ if (a5 != FUNC_FLOAT_ARG5) ++ FAIL_EXIT1 ("a5: %a != %a", a5, FUNC_FLOAT_ARG5); ++ if (a6 != FUNC_FLOAT_ARG6) ++ FAIL_EXIT1 ("a6: %a != %a", a6, FUNC_FLOAT_ARG6); ++ if (a7 != FUNC_FLOAT_ARG7) ++ FAIL_EXIT1 ("a7: %a != %a", a7, FUNC_FLOAT_ARG7); ++ ++ return FUNC_FLOAT_RET; ++} ++ ++double ++tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4, ++ double a5, double a6, double a7) ++{ ++ if (a0 != FUNC_DOUBLE_ARG0) ++ FAIL_EXIT1 ("a0: %la != %la", a0, FUNC_DOUBLE_ARG0); ++ if (a1 != FUNC_DOUBLE_ARG1) ++ FAIL_EXIT1 ("a1: %la != %la", a1, FUNC_DOUBLE_ARG1); ++ if (a2 != FUNC_DOUBLE_ARG2) ++ FAIL_EXIT1 ("a2: %la != %la", a2, FUNC_DOUBLE_ARG2); ++ if (a3 != FUNC_DOUBLE_ARG3) ++ FAIL_EXIT1 ("a3: %la != %la", a3, FUNC_DOUBLE_ARG3); ++ if (a4 != FUNC_DOUBLE_ARG4) ++ FAIL_EXIT1 ("a4: %la != %la", a4, FUNC_DOUBLE_ARG4); ++ if (a5 != FUNC_DOUBLE_ARG5) ++ FAIL_EXIT1 ("a5: %la != %la", a5, FUNC_DOUBLE_ARG5); ++ if (a6 != FUNC_DOUBLE_ARG6) ++ FAIL_EXIT1 ("a6: %la != %la", a6, FUNC_DOUBLE_ARG6); ++ if (a7 != FUNC_DOUBLE_ARG7) ++ FAIL_EXIT1 ("a7: %la != %la", a7, FUNC_DOUBLE_ARG7); ++ ++ return FUNC_DOUBLE_RET; ++} ++ ++long double ++tst_audit27_func_ldouble (long double a0, long double a1, long double a2, ++ long double a3, long double a4, long double a5, ++ long double a6, long double a7) ++{ ++ if (a0 != FUNC_LDOUBLE_ARG0) ++ FAIL_EXIT1 ("a0: %La != %La", a0, FUNC_LDOUBLE_ARG0); ++ if (a1 != FUNC_LDOUBLE_ARG1) ++ FAIL_EXIT1 ("a1: %La != %La", a1, FUNC_LDOUBLE_ARG1); ++ if (a2 != FUNC_LDOUBLE_ARG2) ++ FAIL_EXIT1 ("a2: %La != %La", a2, FUNC_LDOUBLE_ARG2); ++ if (a3 != FUNC_LDOUBLE_ARG3) ++ FAIL_EXIT1 ("a3: %La != %La", a3, FUNC_LDOUBLE_ARG3); ++ if (a4 != FUNC_LDOUBLE_ARG4) ++ FAIL_EXIT1 ("a4: %La != %La", a4, FUNC_LDOUBLE_ARG4); ++ if (a5 != FUNC_LDOUBLE_ARG5) ++ FAIL_EXIT1 ("a5: %La != %La", a5, FUNC_LDOUBLE_ARG5); ++ if (a6 != FUNC_LDOUBLE_ARG6) ++ FAIL_EXIT1 ("a6: %La != %La", a6, FUNC_LDOUBLE_ARG6); ++ if (a7 != FUNC_LDOUBLE_ARG7) ++ FAIL_EXIT1 ("a7: %La != %La", a7, FUNC_LDOUBLE_ARG7); ++ ++ return FUNC_LDOUBLE_RET; ++} +diff --git a/sysdeps/aarch64/tst-audit27mod.h b/sysdeps/aarch64/tst-audit27mod.h +new file mode 100644 +index 0000000000000000..1709d222ca251e3b +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27mod.h +@@ -0,0 +1,67 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDIT27MOD_H ++#define _TST_AUDIT27MOD_H 1 ++ ++#include ++ ++#define FUNC_FLOAT_ARG0 FLT_MIN ++#define FUNC_FLOAT_ARG1 FLT_MAX ++#define FUNC_FLOAT_ARG2 FLT_EPSILON ++#define FUNC_FLOAT_ARG3 FLT_TRUE_MIN ++#define FUNC_FLOAT_ARG4 0.0f ++#define FUNC_FLOAT_ARG5 1.0f ++#define FUNC_FLOAT_ARG6 2.0f ++#define FUNC_FLOAT_ARG7 3.0f ++#define FUNC_FLOAT_RET 4.0f ++ ++float ++tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4, ++ float a5, float a6, float a7); ++ ++#define FUNC_DOUBLE_ARG0 DBL_MIN ++#define FUNC_DOUBLE_ARG1 DBL_MAX ++#define FUNC_DOUBLE_ARG2 DBL_EPSILON ++#define FUNC_DOUBLE_ARG3 DBL_TRUE_MIN ++#define FUNC_DOUBLE_ARG4 0.0 ++#define FUNC_DOUBLE_ARG5 1.0 ++#define FUNC_DOUBLE_ARG6 2.0 ++#define FUNC_DOUBLE_ARG7 3.0 ++#define FUNC_DOUBLE_RET 0x1.fffffe0000001p+127 ++ ++double ++tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4, ++ double a5, double a6, double a7); ++ ++#define FUNC_LDOUBLE_ARG0 DBL_MAX + 1.0L ++#define FUNC_LDOUBLE_ARG1 DBL_MAX + 2.0L ++#define FUNC_LDOUBLE_ARG2 DBL_MAX + 3.0L ++#define FUNC_LDOUBLE_ARG3 DBL_MAX + 4.0L ++#define FUNC_LDOUBLE_ARG4 DBL_MAX + 5.0L ++#define FUNC_LDOUBLE_ARG5 DBL_MAX + 6.0L ++#define FUNC_LDOUBLE_ARG6 DBL_MAX + 7.0L ++#define FUNC_LDOUBLE_ARG7 DBL_MAX + 8.0L ++#define FUNC_LDOUBLE_RET 0x1.fffffffffffff000000000000001p+1023L ++ ++long double ++tst_audit27_func_ldouble (long double a0, long double a1, long double a2, ++ long double a3, long double a4, long double a5, ++ long double a6, long double a7); ++ ++#endif +diff --git a/sysdeps/aarch64/tst-auditmod26.c b/sysdeps/aarch64/tst-auditmod26.c +new file mode 100644 +index 0000000000000000..b03b6baed9aeb528 +--- /dev/null ++++ b/sysdeps/aarch64/tst-auditmod26.c +@@ -0,0 +1,103 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "tst-audit26mod.h" ++ ++#define TEST_NAME "tst-audit26" ++ ++#define AUDIT26_COOKIE 0 ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return v; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? map->l_name : p + 1; ++ uintptr_t ck = -1; ++ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT26_COOKIE; ++ *cookie = ck; ++ printf ("objopen: %ld, %s [cookie=%ld]\n", lmid, l_name, ck); ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++ElfW(Addr) ++la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), ++ unsigned int ndx __attribute__ ((unused)), ++ uintptr_t *refcook, uintptr_t *defcook, ++ La_aarch64_regs *regs, unsigned int *flags, ++ const char *symname, long int *framesizep) ++{ ++ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", ++ symname, (long int) sym->st_value, ndx, *flags); ++ ++ if (strcmp (symname, "tst_audit26_func") == 0) ++ { ++ assert (regs->lr_xreg[0] == ARG1); ++ assert (regs->lr_xreg[1] == ARG2); ++ assert (regs->lr_xreg[2] == ARG3); ++ } ++ else ++ abort (); ++ ++ assert (regs->lr_vpcs == 0); ++ ++ /* Clobber 'x8'. */ ++ asm volatile ("mov x8, -1" : : : "x8"); ++ ++ *framesizep = 1024; ++ ++ return sym->st_value; ++} ++ ++unsigned int ++la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, ++ const struct La_aarch64_regs *inregs, ++ struct La_aarch64_retval *outregs, const char *symname) ++{ ++ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n", ++ symname, (long int) sym->st_value, ndx); ++ ++ if (strcmp (symname, "tst_audit26_func") == 0) ++ { ++ assert (inregs->lr_xreg[0] == ARG1); ++ assert (inregs->lr_xreg[1] == ARG2); ++ assert (inregs->lr_xreg[2] == ARG3); ++ } ++ else ++ abort (); ++ ++ assert (inregs->lr_vpcs == 0); ++ assert (outregs->lrv_vpcs == 0); ++ ++ /* Clobber 'x8'. */ ++ asm volatile ("mov x8, -1" : : : "x8"); ++ ++ return 0; ++} +diff --git a/sysdeps/aarch64/tst-auditmod27.c b/sysdeps/aarch64/tst-auditmod27.c +new file mode 100644 +index 0000000000000000..21132c2985dab7b2 +--- /dev/null ++++ b/sysdeps/aarch64/tst-auditmod27.c +@@ -0,0 +1,180 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++#define TEST_NAME "tst-audit27" ++ ++#define AUDIT27_COOKIE 0 ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return v; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? map->l_name : p + 1; ++ uintptr_t ck = -1; ++ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT27_COOKIE; ++ *cookie = ck; ++ printf ("objopen: %ld, %s [%ld]\n", lmid, l_name, ck); ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++ElfW(Addr) ++la_aarch64_gnu_pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, La_aarch64_regs *regs, ++ unsigned int *flags, const char *symname, ++ long int *framesizep) ++{ ++ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", ++ symname, (long int) sym->st_value, ndx, *flags); ++ ++ if (strcmp (symname, "tst_audit27_func_float") == 0) ++ { ++ assert (regs->lr_vreg[0].s == FUNC_FLOAT_ARG0); ++ assert (regs->lr_vreg[1].s == FUNC_FLOAT_ARG1); ++ assert (regs->lr_vreg[2].s == FUNC_FLOAT_ARG2); ++ assert (regs->lr_vreg[3].s == FUNC_FLOAT_ARG3); ++ assert (regs->lr_vreg[4].s == FUNC_FLOAT_ARG4); ++ assert (regs->lr_vreg[5].s == FUNC_FLOAT_ARG5); ++ assert (regs->lr_vreg[6].s == FUNC_FLOAT_ARG6); ++ assert (regs->lr_vreg[7].s == FUNC_FLOAT_ARG7); ++ } ++ else if (strcmp (symname, "tst_audit27_func_double") == 0) ++ { ++ assert (regs->lr_vreg[0].d == FUNC_DOUBLE_ARG0); ++ assert (regs->lr_vreg[1].d == FUNC_DOUBLE_ARG1); ++ assert (regs->lr_vreg[2].d == FUNC_DOUBLE_ARG2); ++ assert (regs->lr_vreg[3].d == FUNC_DOUBLE_ARG3); ++ assert (regs->lr_vreg[4].d == FUNC_DOUBLE_ARG4); ++ assert (regs->lr_vreg[5].d == FUNC_DOUBLE_ARG5); ++ assert (regs->lr_vreg[6].d == FUNC_DOUBLE_ARG6); ++ assert (regs->lr_vreg[7].d == FUNC_DOUBLE_ARG7); ++ } ++ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0) ++ { ++ assert (regs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0); ++ assert (regs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1); ++ assert (regs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2); ++ assert (regs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3); ++ assert (regs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4); ++ assert (regs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5); ++ assert (regs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6); ++ assert (regs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7); ++ } ++ else ++ abort (); ++ ++ assert (regs->lr_vpcs == 0); ++ ++ /* Clobber the q registers on exit. */ ++ uint8_t v = 0xff; ++ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0"); ++ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1"); ++ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2"); ++ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3"); ++ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4"); ++ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5"); ++ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6"); ++ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7"); ++ ++ *framesizep = 1024; ++ ++ return sym->st_value; ++} ++ ++unsigned int ++la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, ++ const struct La_aarch64_regs *inregs, ++ struct La_aarch64_retval *outregs, ++ const char *symname) ++{ ++ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n", ++ symname, (long int) sym->st_value, ndx); ++ ++ if (strcmp (symname, "tst_audit27_func_float") == 0) ++ { ++ assert (inregs->lr_vreg[0].s == FUNC_FLOAT_ARG0); ++ assert (inregs->lr_vreg[1].s == FUNC_FLOAT_ARG1); ++ assert (inregs->lr_vreg[2].s == FUNC_FLOAT_ARG2); ++ assert (inregs->lr_vreg[3].s == FUNC_FLOAT_ARG3); ++ assert (inregs->lr_vreg[4].s == FUNC_FLOAT_ARG4); ++ assert (inregs->lr_vreg[5].s == FUNC_FLOAT_ARG5); ++ assert (inregs->lr_vreg[6].s == FUNC_FLOAT_ARG6); ++ assert (inregs->lr_vreg[7].s == FUNC_FLOAT_ARG7); ++ ++ assert (outregs->lrv_vreg[0].s == FUNC_FLOAT_RET); ++ } ++ else if (strcmp (symname, "tst_audit27_func_double") == 0) ++ { ++ assert (inregs->lr_vreg[0].d == FUNC_DOUBLE_ARG0); ++ assert (inregs->lr_vreg[1].d == FUNC_DOUBLE_ARG1); ++ assert (inregs->lr_vreg[2].d == FUNC_DOUBLE_ARG2); ++ assert (inregs->lr_vreg[3].d == FUNC_DOUBLE_ARG3); ++ assert (inregs->lr_vreg[4].d == FUNC_DOUBLE_ARG4); ++ assert (inregs->lr_vreg[5].d == FUNC_DOUBLE_ARG5); ++ assert (inregs->lr_vreg[6].d == FUNC_DOUBLE_ARG6); ++ assert (inregs->lr_vreg[7].d == FUNC_DOUBLE_ARG7); ++ ++ assert (outregs->lrv_vreg[0].d == FUNC_DOUBLE_RET); ++ } ++ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0) ++ { ++ assert (inregs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0); ++ assert (inregs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1); ++ assert (inregs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2); ++ assert (inregs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3); ++ assert (inregs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4); ++ assert (inregs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5); ++ assert (inregs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6); ++ assert (inregs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7); ++ ++ assert (outregs->lrv_vreg[0].q == FUNC_LDOUBLE_RET); ++ } ++ else ++ abort (); ++ ++ assert (inregs->lr_vpcs == 0); ++ assert (outregs->lrv_vpcs == 0); ++ ++ /* Clobber the q registers on exit. */ ++ uint8_t v = 0xff; ++ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0"); ++ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1"); ++ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2"); ++ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3"); ++ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4"); ++ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5"); ++ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6"); ++ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7"); ++ ++ return 0; ++} +diff --git a/sysdeps/generic/dl-audit-check.h b/sysdeps/generic/dl-audit-check.h +new file mode 100644 +index 0000000000000000..3ab76532868b5895 +--- /dev/null ++++ b/sysdeps/generic/dl-audit-check.h +@@ -0,0 +1,23 @@ ++/* rtld-audit version check. Generic version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ return lav <= LAV_CURRENT; ++} diff --git a/SOURCES/glibc-upstream-2.34-158.patch b/SOURCES/glibc-upstream-2.34-158.patch new file mode 100644 index 0000000..1cb44e2 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-158.patch @@ -0,0 +1,23 @@ +commit 165e7ad459fbba2f89708fba04a55bb3981e884c +Author: Szabolcs Nagy +Date: Wed Feb 2 14:03:58 2022 +0000 + + Fix elf/tst-audit25a with default bind now toolchains + + This test relies on lazy binding for the executable so request that + explicitly in case the toolchain defaults to bind now. + + (cherry picked from commit 80a08d0faa9b224019f895800c4d97de4e23e1aa) + +diff --git a/elf/Makefile b/elf/Makefile +index 0ab3e885f5e35671..9e4e056938a75ddb 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -2133,6 +2133,7 @@ $(objpfx)tst-audit25a: $(objpfx)tst-audit25mod1.so \ + $(objpfx)tst-audit25mod2.so \ + $(objpfx)tst-audit25mod3.so \ + $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25a = -Wl,-z,lazy + $(objpfx)tst-audit25mod1.so: $(objpfx)tst-audit25mod3.so + LDFLAGS-tst-audit25mod1.so = -Wl,-z,now + $(objpfx)tst-audit25mod2.so: $(objpfx)tst-audit25mod4.so diff --git a/SOURCES/glibc-upstream-2.34-159.patch b/SOURCES/glibc-upstream-2.34-159.patch new file mode 100644 index 0000000..e45d205 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-159.patch @@ -0,0 +1,27 @@ +commit aabdad371f44defc6046aabdc96af7782a2e94be +Author: H.J. Lu +Date: Sun Feb 6 11:12:24 2022 -0800 + + elf: Replace tst-audit24bmod2.so with tst-audit24bmod2 + + Replace tst-audit24bmod2.so with tst-audit24bmod2 to silence: + + make[2]: Entering directory '/export/gnu/import/git/gitlab/x86-glibc/elf' + Makefile:2201: warning: overriding recipe for target '/export/build/gnu/tools-build/glibc-gitlab/build-x86_64-linux/elf/tst-audit24bmod2.so' + ../Makerules:765: warning: ignoring old recipe for target '/export/build/gnu/tools-build/glibc-gitlab/build-x86_64-linux/elf/tst-audit24bmod2.so' + + (cherry picked from commit fa7ad1df1915c8a62f50e3a5b7e10f9c7118cd7f) + +diff --git a/elf/Makefile b/elf/Makefile +index 9e4e056938a75ddb..57059293d0bc86cb 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -888,7 +888,7 @@ extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) + # filtmod1.so, tst-big-note-lib.so, tst-ro-dynamic-mod.so have special + # rules. + modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod \ +- tst-audit24bmod1 tst-audit24bmod2.so ++ tst-audit24bmod1 tst-audit24bmod2 + + tests += $(tests-static) + diff --git a/SOURCES/glibc-upstream-2.34-160.patch b/SOURCES/glibc-upstream-2.34-160.patch new file mode 100644 index 0000000..2ce930a --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-160.patch @@ -0,0 +1,114 @@ +commit 4dca2d3a7b43bf99bd6a567870a3144af4e763ef +Author: Adhemerval Zanella +Date: Fri Feb 4 15:54:59 2022 -0300 + + hppa: Fix bind-now audit (BZ #28857) + + On hppa, a function pointer returned by la_symbind is actually a function + descriptor has the plabel bit set (bit 30). This must be cleared to get + the actual address of the descriptor. If the descriptor has been bound, + the first word of the descriptor is the physical address of theA function, + otherwise, the first word of the descriptor points to a trampoline in the + PLT. + + This patch also adds a workaround on tests because on hppa (and it seems + to be the only ABI I have see it), some shared library adds a dynamic PLT + relocation to am empty symbol name: + + $ readelf -r elf/tst-audit25mod1.so + [...] + Relocation section '.rela.plt' at offset 0x464 contains 6 entries: + Offset Info Type Sym.Value Sym. Name + Addend + 00002008 00000081 R_PARISC_IPLT 508 + [...] + + It breaks some assumptions on the test, where a symbol with an empty + name ("") is passed on la_symbind. + + Checked on x86_64-linux-gnu and hppa-linux-gnu. + + (cherry picked from commit 9e94f57484a2aba0fe67ea2059b5843f651887c2) + +diff --git a/elf/Makefile b/elf/Makefile +index 57059293d0bc86cb..3e17a0706f5ec2df 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -2116,7 +2116,7 @@ $(objpfx)tst-audit24c.out: $(objpfx)tst-auditmod24c.so + $(objpfx)tst-audit24c: $(objpfx)tst-audit24amod1.so \ + $(objpfx)tst-audit24amod2.so + tst-audit24c-ENV = LD_BIND_NOW=1 LD_AUDIT=$(objpfx)tst-auditmod24c.so +-LDFLAGS-tst-audit24b = -Wl,-z,lazy ++LDFLAGS-tst-audit24c = -Wl,-z,lazy + + $(objpfx)tst-audit24d.out: $(objpfx)tst-auditmod24d.so + $(objpfx)tst-audit24d: $(objpfx)tst-audit24dmod1.so \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 72a50717ef60a357..ec9b032eae37c103 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -257,7 +257,8 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + reloc_result->flags = flags; + } + +- DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); ++ if (flags & LA_SYMB_ALTVALUE) ++ DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); + } + + void +diff --git a/elf/tst-auditmod24a.c b/elf/tst-auditmod24a.c +index d8e88f3984af1707..3075dfae2fd3d288 100644 +--- a/elf/tst-auditmod24a.c ++++ b/elf/tst-auditmod24a.c +@@ -110,5 +110,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + return sym->st_value; + } + +- abort (); ++ if (symname[0] != '\0') ++ abort (); ++ return sym->st_value; + } +diff --git a/elf/tst-auditmod24d.c b/elf/tst-auditmod24d.c +index 8c803ecc0a48f21b..badc6be451ee0357 100644 +--- a/elf/tst-auditmod24d.c ++++ b/elf/tst-auditmod24d.c +@@ -116,5 +116,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + } + } + +- abort (); ++ if (symname[0] != '\0') ++ abort (); ++ return sym->st_value; + } +diff --git a/elf/tst-auditmod25.c b/elf/tst-auditmod25.c +index 526f5c54bc2c3b8c..20640a8daf346b5f 100644 +--- a/elf/tst-auditmod25.c ++++ b/elf/tst-auditmod25.c +@@ -72,7 +72,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + unsigned int *flags, const char *symname) + #endif + { +- if (*refcook != -1 && *defcook != -1) ++ if (*refcook != -1 && *defcook != -1 && symname[0] != '\0') + fprintf (stderr, "la_symbind: %s %u\n", symname, + *flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ? 1 : 0); + return sym->st_value; +diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h +index f4f00714fa158e18..92fd0b7c844713ce 100644 +--- a/sysdeps/hppa/dl-lookupcfg.h ++++ b/sysdeps/hppa/dl-lookupcfg.h +@@ -80,7 +80,9 @@ void attribute_hidden _dl_unmap (struct link_map *map); + /* Extract the code address from a fixup value */ + #define DL_FIXUP_VALUE_CODE_ADDR(value) ((value).ip) + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) +-#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++/* Clear the plabel bit to get the actual address of the descriptor. */ ++#define DL_FIXUP_ADDR_VALUE(addr) \ ++ (*(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (addr) & ~2)) + #define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) +-#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ +- (*value) = *(struct fdesc *) (st_value) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ *(value) = *(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (new_value) & ~2) diff --git a/SOURCES/glibc-upstream-2.34-162.patch b/SOURCES/glibc-upstream-2.34-162.patch new file mode 100644 index 0000000..d409366 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-162.patch @@ -0,0 +1,242 @@ +commit 0c03cb54c808173d8e7ba96f6152dfcf627ac496 +Author: Stefan Liebler +Date: Wed Apr 13 14:36:09 2022 +0200 + + S390: Add new s390 platform z16. + + The new IBM z16 is added to platform string array. + The macro _DL_PLATFORMS_COUNT is incremented. + + _dl_hwcaps_subdir is extended by "z16" if HWCAP_S390_VXRS_PDE2 + is set. HWCAP_S390_NNPA is not tested in _dl_hwcaps_subdirs_active + as those instructions may be replaced or removed in future. + + tst-glibc-hwcaps.c is extended in order to test z16 via new marker5. + + A fatal glibc error is dumped if glibc was build with architecture + level set for z16, but run on an older machine. (See dl-hwcap-check.h) + + (cherry picked from commit 2376944b9e5c0364b9fb473e4d8dabca31b57167) + +Conflicts: + sysdeps/s390/s390-64/dl-hwcap-check.h - Use GCCMACRO__ARCH__. + - Backported f01d482f0355a7029d0715ace0ccf3323e7e94bc requires it. + +diff --git a/elf/Makefile b/elf/Makefile +index 3e17a0706f5ec2df..8e2dd91c583f9a62 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -571,6 +571,11 @@ modules-names = \ + libmarkermod4-2 \ + libmarkermod4-3 \ + libmarkermod4-4 \ ++ libmarkermod5-1 \ ++ libmarkermod5-2 \ ++ libmarkermod5-3 \ ++ libmarkermod5-4 \ ++ libmarkermod5-5 \ + ltglobmod1 \ + ltglobmod2 \ + neededobj1 \ +@@ -2412,6 +2417,7 @@ LDFLAGS-libmarkermod1-1.so += -Wl,-soname,libmarkermod1.so + LDFLAGS-libmarkermod2-1.so += -Wl,-soname,libmarkermod2.so + LDFLAGS-libmarkermod3-1.so += -Wl,-soname,libmarkermod3.so + LDFLAGS-libmarkermod4-1.so += -Wl,-soname,libmarkermod4.so ++LDFLAGS-libmarkermod5-1.so += -Wl,-soname,libmarkermod5.so + $(objpfx)libmarkermod%.os : markermodMARKER-VALUE.c + $(compile-command.c) \ + -DMARKER=marker$(firstword $(subst -, ,$*)) \ +@@ -2424,6 +2430,8 @@ $(objpfx)libmarkermod3.so: $(objpfx)libmarkermod3-1.so + cp $< $@ + $(objpfx)libmarkermod4.so: $(objpfx)libmarkermod4-1.so + cp $< $@ ++$(objpfx)libmarkermod5.so: $(objpfx)libmarkermod5-1.so ++ cp $< $@ + + # tst-glibc-hwcaps-prepend checks that --glibc-hwcaps-prepend is + # preferred over auto-detected subdirectories. +diff --git a/elf/tst-glibc-hwcaps-cache.script b/elf/tst-glibc-hwcaps-cache.script +index c3271f61f9e50f2e..d58fc8c5de3c5198 100644 +--- a/elf/tst-glibc-hwcaps-cache.script ++++ b/elf/tst-glibc-hwcaps-cache.script +@@ -4,6 +4,7 @@ + cp $B/elf/libmarkermod2-1.so $L/libmarkermod2.so + cp $B/elf/libmarkermod3-1.so $L/libmarkermod3.so + cp $B/elf/libmarkermod4-1.so $L/libmarkermod4.so ++cp $B/elf/libmarkermod5-1.so $L/libmarkermod5.so + + mkdirp 0770 $L/glibc-hwcaps/power9 + cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/power9/libmarkermod2.so +@@ -20,6 +21,11 @@ mkdirp 0770 $L/glibc-hwcaps/z15 + cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z13/libmarkermod4.so + cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z14/libmarkermod4.so + cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so ++mkdirp 0770 $L/glibc-hwcaps/z16 ++cp $B/elf/libmarkermod5-2.so $L/glibc-hwcaps/z13/libmarkermod5.so ++cp $B/elf/libmarkermod5-3.so $L/glibc-hwcaps/z14/libmarkermod5.so ++cp $B/elf/libmarkermod5-4.so $L/glibc-hwcaps/z15/libmarkermod5.so ++cp $B/elf/libmarkermod5-5.so $L/glibc-hwcaps/z16/libmarkermod5.so + + mkdirp 0770 $L/glibc-hwcaps/x86-64-v2 + cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so +diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c +index 155f0bd99eccb3f9..755b54ff13a0fa2f 100644 +--- a/sysdeps/s390/dl-procinfo.c ++++ b/sysdeps/s390/dl-procinfo.c +@@ -64,11 +64,12 @@ PROCINFO_CLASS const char _dl_s390_cap_flags[23][9] + #if !defined PROCINFO_DECL && defined SHARED + ._dl_s390_platforms + #else +-PROCINFO_CLASS const char _dl_s390_platforms[10][7] ++PROCINFO_CLASS const char _dl_s390_platforms[11][7] + #endif + #ifndef PROCINFO_DECL + = { +- "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15" ++ "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15", ++ "z16" + } + #endif + #if !defined SHARED || defined PROCINFO_DECL +diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h +index e4e3e334a5b3d47c..d44e1dd97441bd90 100644 +--- a/sysdeps/s390/dl-procinfo.h ++++ b/sysdeps/s390/dl-procinfo.h +@@ -23,7 +23,7 @@ + + #define _DL_HWCAP_COUNT 23 + +-#define _DL_PLATFORMS_COUNT 10 ++#define _DL_PLATFORMS_COUNT 11 + + /* The kernel provides up to 32 capability bits with elf_hwcap. */ + #define _DL_FIRST_PLATFORM 32 +diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile +index e5da26871c862e63..66ed844e68df5159 100644 +--- a/sysdeps/s390/s390-64/Makefile ++++ b/sysdeps/s390/s390-64/Makefile +@@ -7,8 +7,11 @@ CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused + CFLAGS-dl-load.c += -Wno-unused + CFLAGS-dl-reloc.c += -Wno-unused + +-$(objpfx)tst-glibc-hwcaps: $(objpfx)libmarkermod2-1.so \ +- $(objpfx)libmarkermod3-1.so $(objpfx)libmarkermod4-1.so ++$(objpfx)tst-glibc-hwcaps: \ ++ $(objpfx)libmarkermod2-1.so \ ++ $(objpfx)libmarkermod3-1.so \ ++ $(objpfx)libmarkermod4-1.so \ ++ $(objpfx)libmarkermod5-1.so + $(objpfx)tst-glibc-hwcaps.out: \ + $(objpfx)libmarkermod2.so \ + $(objpfx)glibc-hwcaps/z13/libmarkermod2.so \ +@@ -19,6 +22,11 @@ $(objpfx)tst-glibc-hwcaps.out: \ + $(objpfx)glibc-hwcaps/z13/libmarkermod4.so \ + $(objpfx)glibc-hwcaps/z14/libmarkermod4.so \ + $(objpfx)glibc-hwcaps/z15/libmarkermod4.so \ ++ $(objpfx)libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z13/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z14/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z15/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z16/libmarkermod5.so + + $(objpfx)glibc-hwcaps/z13/libmarkermod2.so: $(objpfx)libmarkermod2-2.so + $(make-target-directory) +@@ -38,6 +46,19 @@ $(objpfx)glibc-hwcaps/z14/libmarkermod4.so: $(objpfx)libmarkermod4-3.so + $(objpfx)glibc-hwcaps/z15/libmarkermod4.so: $(objpfx)libmarkermod4-4.so + $(make-target-directory) + cp $< $@ ++$(objpfx)glibc-hwcaps/z13/libmarkermod5.so: $(objpfx)libmarkermod5-2.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z14/libmarkermod5.so: $(objpfx)libmarkermod5-3.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z15/libmarkermod5.so: $(objpfx)libmarkermod5-4.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z16/libmarkermod5.so: $(objpfx)libmarkermod5-5.so ++ $(make-target-directory) ++ cp $< $@ ++ + + ifeq (no,$(build-hardcoded-path-in-tests)) + # This is an ld.so.cache test, and RPATH/RUNPATH in the executable +diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h +index 27f7e245b1d1a9e9..52c609571b32f4ab 100644 +--- a/sysdeps/s390/s390-64/dl-hwcap-check.h ++++ b/sysdeps/s390/s390-64/dl-hwcap-check.h +@@ -26,7 +26,11 @@ static inline void + dl_hwcap_check (void) + { + #if defined __ARCH__ +-# if GCCMACRO__ARCH__ >= 13 ++# if GCCMACRO__ARCH__ >= 14 ++ if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2)) ++ _dl_fatal_printf ("\ ++Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n"); ++# elif GCCMACRO__ARCH__ >= 13 + if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2)) + _dl_fatal_printf ("\ + Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n"); +diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c +index b9d094f3d73c2d7a..187d732d560c4a62 100644 +--- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c ++++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c +@@ -19,8 +19,8 @@ + #include + #include + +-const char _dl_hwcaps_subdirs[] = "z15:z14:z13"; +-enum { subdirs_count = 3 }; /* Number of components in _dl_hwcaps_subdirs. */ ++const char _dl_hwcaps_subdirs[] = "z16:z15:z14:z13"; ++enum { subdirs_count = 4 }; /* Number of components in _dl_hwcaps_subdirs. */ + + uint32_t + _dl_hwcaps_subdirs_active (void) +@@ -50,5 +50,12 @@ _dl_hwcaps_subdirs_active (void) + return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); + ++active; + ++ /* z16. ++ Note: We do not list HWCAP_S390_NNPA here as, according to the Principles of ++ Operation, those instructions may be replaced or removed in future. */ ++ if (!(GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE2)) ++ return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); ++ ++active; ++ + return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); + } +diff --git a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c +index 02397a478c552516..f3b8ef3dec80d2d1 100644 +--- a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c ++++ b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c +@@ -25,6 +25,7 @@ + extern int marker2 (void); + extern int marker3 (void); + extern int marker4 (void); ++extern int marker5 (void); + + /* Return the arch level, 10 for the baseline libmarkermod*.so's. */ + static int +@@ -63,9 +64,11 @@ compute_level (void) + return 12; + if (strcmp (platform, "z15") == 0) + return 13; ++ if (strcmp (platform, "z16") == 0) ++ return 14; + printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform); +- /* Assume that the new platform supports z15. */ +- return 13; ++ /* Assume that the new platform supports z16. */ ++ return 14; + } + + static int +@@ -76,6 +79,7 @@ do_test (void) + TEST_COMPARE (marker2 (), MIN (level - 9, 2)); + TEST_COMPARE (marker3 (), MIN (level - 9, 3)); + TEST_COMPARE (marker4 (), MIN (level - 9, 4)); ++ TEST_COMPARE (marker5 (), MIN (level - 9, 5)); + return 0; + } + diff --git a/SOURCES/glibc-upstream-2.34-163.patch b/SOURCES/glibc-upstream-2.34-163.patch new file mode 100644 index 0000000..251939d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-163.patch @@ -0,0 +1,834 @@ +commit 290db09546b260a30137d03ce97a857e6f15b648 +Author: Adhemerval Zanella +Date: Wed Apr 6 12:24:42 2022 -0300 + + nptl: Handle spurious EINTR when thread cancellation is disabled (BZ#29029) + + Some Linux interfaces never restart after being interrupted by a signal + handler, regardless of the use of SA_RESTART [1]. It means that for + pthread cancellation, if the target thread disables cancellation with + pthread_setcancelstate and calls such interfaces (like poll or select), + it should not see spurious EINTR failures due the internal SIGCANCEL. + + However recent changes made pthread_cancel to always sent the internal + signal, regardless of the target thread cancellation status or type. + To fix it, the previous semantic is restored, where the cancel signal + is only sent if the target thread has cancelation enabled in + asynchronous mode. + + The cancel state and cancel type is moved back to cancelhandling + and atomic operation are used to synchronize between threads. The + patch essentially revert the following commits: + + 8c1c0aae20 nptl: Move cancel type out of cancelhandling + 2b51742531 nptl: Move cancel state out of cancelhandling + 26cfbb7162 nptl: Remove CANCELING_BITMASK + + However I changed the atomic operation to follow the internal C11 + semantic and removed the MACRO usage, it simplifies a bit the + resulting code (and removes another usage of the old atomic macros). + + Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, + and powerpc64-linux-gnu. + + [1] https://man7.org/linux/man-pages/man7/signal.7.html + + Reviewed-by: Florian Weimer + Tested-by: Aurelien Jarno + + (cherry-picked from commit 404656009b459658138ed1bd18f3c6cf3863e6a6) + +diff --git a/manual/process.texi b/manual/process.texi +index 28c9531f4294f56e..9307379194c6f666 100644 +--- a/manual/process.texi ++++ b/manual/process.texi +@@ -68,8 +68,7 @@ until the subprogram terminates before you can do anything else. + @c CLEANUP_HANDLER @ascuplugin @ascuheap @acsmem + @c libc_cleanup_region_start @ascuplugin @ascuheap @acsmem + @c pthread_cleanup_push_defer @ascuplugin @ascuheap @acsmem +-@c __pthread_testcancel @ascuplugin @ascuheap @acsmem +-@c CANCEL_ENABLED_AND_CANCELED ok ++@c cancel_enabled_and_canceled @ascuplugin @ascuheap @acsmem + @c do_cancel @ascuplugin @ascuheap @acsmem + @c cancel_handler ok + @c kill syscall ok +diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c +index 554a721f814b53c4..96101753ec2f4323 100644 +--- a/nptl/allocatestack.c ++++ b/nptl/allocatestack.c +@@ -120,8 +120,6 @@ get_cached_stack (size_t *sizep, void **memp) + + /* Cancellation handling is back to the default. */ + result->cancelhandling = 0; +- result->cancelstate = PTHREAD_CANCEL_ENABLE; +- result->canceltype = PTHREAD_CANCEL_DEFERRED; + result->cleanup = NULL; + result->setup_failed = 0; + +diff --git a/nptl/cancellation.c b/nptl/cancellation.c +index 05962784d51fb98b..e97d56f97d7a5698 100644 +--- a/nptl/cancellation.c ++++ b/nptl/cancellation.c +@@ -31,19 +31,26 @@ int + __pthread_enable_asynccancel (void) + { + struct pthread *self = THREAD_SELF; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); + +- int oldval = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_ASYNCHRONOUS); ++ while (1) ++ { ++ int newval = oldval | CANCELTYPE_BITMASK; + +- int ch = THREAD_GETMEM (self, cancelhandling); ++ if (newval == oldval) ++ break; + +- if (self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (ch & CANCELED_BITMASK) +- && !(ch & EXITING_BITMASK) +- && !(ch & TERMINATED_BITMASK)) +- { +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); +- __do_cancel (); ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ ++ break; ++ } + } + + return oldval; +@@ -57,10 +64,29 @@ __pthread_disable_asynccancel (int oldtype) + { + /* If asynchronous cancellation was enabled before we do not have + anything to do. */ +- if (oldtype == PTHREAD_CANCEL_ASYNCHRONOUS) ++ if (oldtype & CANCELTYPE_BITMASK) + return; + + struct pthread *self = THREAD_SELF; +- self->canceltype = PTHREAD_CANCEL_DEFERRED; ++ int newval; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ do ++ { ++ newval = oldval & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)); ++ ++ /* We cannot return when we are being canceled. Upon return the ++ thread might be things which would have to be undone. The ++ following loop should loop until the cancellation signal is ++ delivered. */ ++ while (__glibc_unlikely ((newval & (CANCELING_BITMASK | CANCELED_BITMASK)) ++ == CANCELING_BITMASK)) ++ { ++ futex_wait_simple ((unsigned int *) &self->cancelhandling, newval, ++ FUTEX_PRIVATE); ++ newval = atomic_load_relaxed (&self->cancelhandling); ++ } + } + libc_hidden_def (__pthread_disable_asynccancel) +diff --git a/nptl/cleanup_defer.c b/nptl/cleanup_defer.c +index 7e858d0df068276b..35ba40fb0247c7cc 100644 +--- a/nptl/cleanup_defer.c ++++ b/nptl/cleanup_defer.c +@@ -31,9 +31,22 @@ ___pthread_register_cancel_defer (__pthread_unwind_buf_t *buf) + ibuf->priv.data.prev = THREAD_GETMEM (self, cleanup_jmp_buf); + ibuf->priv.data.cleanup = THREAD_GETMEM (self, cleanup); + +- /* Disable asynchronous cancellation for now. */ +- ibuf->priv.data.canceltype = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED); ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK)) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, ++ newval)); ++ } ++ ++ ibuf->priv.data.canceltype = (cancelhandling & CANCELTYPE_BITMASK ++ ? PTHREAD_CANCEL_ASYNCHRONOUS ++ : PTHREAD_CANCEL_DEFERRED); + + /* Store the new cleanup handler info. */ + THREAD_SETMEM (self, cleanup_jmp_buf, (struct pthread_unwind_buf *) buf); +@@ -55,9 +68,26 @@ ___pthread_unregister_cancel_restore (__pthread_unwind_buf_t *buf) + + THREAD_SETMEM (self, cleanup_jmp_buf, ibuf->priv.data.prev); + +- THREAD_SETMEM (self, canceltype, ibuf->priv.data.canceltype); +- if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_DEFERRED) ++ return; ++ ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancelhandling & CANCELTYPE_BITMASK) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling | CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, newval)); ++ ++ if (cancel_enabled_and_canceled (cancelhandling)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ } + } + versioned_symbol (libc, ___pthread_unregister_cancel_restore, + __pthread_unregister_cancel_restore, GLIBC_2_34); +diff --git a/nptl/descr.h b/nptl/descr.h +index dabf980e29615db3..dfef9c4bda075d13 100644 +--- a/nptl/descr.h ++++ b/nptl/descr.h +@@ -280,18 +280,27 @@ struct pthread + + /* Flags determining processing of cancellation. */ + int cancelhandling; ++ /* Bit set if cancellation is disabled. */ ++#define CANCELSTATE_BIT 0 ++#define CANCELSTATE_BITMASK (1 << CANCELSTATE_BIT) ++ /* Bit set if asynchronous cancellation mode is selected. */ ++#define CANCELTYPE_BIT 1 ++#define CANCELTYPE_BITMASK (1 << CANCELTYPE_BIT) ++ /* Bit set if canceling has been initiated. */ ++#define CANCELING_BIT 2 ++#define CANCELING_BITMASK (1 << CANCELING_BIT) + /* Bit set if canceled. */ + #define CANCELED_BIT 3 +-#define CANCELED_BITMASK (0x01 << CANCELED_BIT) ++#define CANCELED_BITMASK (1 << CANCELED_BIT) + /* Bit set if thread is exiting. */ + #define EXITING_BIT 4 +-#define EXITING_BITMASK (0x01 << EXITING_BIT) ++#define EXITING_BITMASK (1 << EXITING_BIT) + /* Bit set if thread terminated and TCB is freed. */ + #define TERMINATED_BIT 5 +-#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT) ++#define TERMINATED_BITMASK (1 << TERMINATED_BIT) + /* Bit set if thread is supposed to change XID. */ + #define SETXID_BIT 6 +-#define SETXID_BITMASK (0x01 << SETXID_BIT) ++#define SETXID_BITMASK (1 << SETXID_BIT) + + /* Flags. Including those copied from the thread attribute. */ + int flags; +@@ -391,14 +400,6 @@ struct pthread + /* Indicates whether is a C11 thread created by thrd_creat. */ + bool c11; + +- /* Thread cancel state (PTHREAD_CANCEL_ENABLE or +- PTHREAD_CANCEL_DISABLE). */ +- unsigned char cancelstate; +- +- /* Thread cancel type (PTHREAD_CANCEL_DEFERRED or +- PTHREAD_CANCEL_ASYNCHRONOUS). */ +- unsigned char canceltype; +- + /* Used in __pthread_kill_internal to detected a thread that has + exited or is about to exit. exit_lock must only be acquired + after blocking signals. */ +@@ -418,6 +419,22 @@ struct pthread + (sizeof (struct pthread) - offsetof (struct pthread, end_padding)) + } __attribute ((aligned (TCB_ALIGNMENT))); + ++static inline bool ++cancel_enabled_and_canceled (int value) ++{ ++ return (value & (CANCELSTATE_BITMASK | CANCELED_BITMASK | EXITING_BITMASK ++ | TERMINATED_BITMASK)) ++ == CANCELED_BITMASK; ++} ++ ++static inline bool ++cancel_enabled_and_canceled_and_async (int value) ++{ ++ return ((value) & (CANCELSTATE_BITMASK | CANCELTYPE_BITMASK | CANCELED_BITMASK ++ | EXITING_BITMASK | TERMINATED_BITMASK)) ++ == (CANCELTYPE_BITMASK | CANCELED_BITMASK); ++} ++ + /* This yields the pointer that TLS support code calls the thread pointer. */ + #if TLS_TCB_AT_TP + # define TLS_TPADJ(pd) (pd) +diff --git a/nptl/libc-cleanup.c b/nptl/libc-cleanup.c +index 180d15bc9e9a8368..fccb1abe69aa693c 100644 +--- a/nptl/libc-cleanup.c ++++ b/nptl/libc-cleanup.c +@@ -27,9 +27,24 @@ __libc_cleanup_push_defer (struct _pthread_cleanup_buffer *buffer) + + buffer->__prev = THREAD_GETMEM (self, cleanup); + ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ + /* Disable asynchronous cancellation for now. */ +- buffer->__canceltype = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED); ++ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK)) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, ++ newval)); ++ } ++ ++ buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK ++ ? PTHREAD_CANCEL_ASYNCHRONOUS ++ : PTHREAD_CANCEL_DEFERRED); + + THREAD_SETMEM (self, cleanup, buffer); + } +@@ -42,8 +57,22 @@ __libc_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer) + + THREAD_SETMEM (self, cleanup, buffer->__prev); + +- THREAD_SETMEM (self, canceltype, buffer->__canceltype); +- if (buffer->__canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancelhandling & CANCELTYPE_BITMASK) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling | CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, newval)); ++ ++ if (cancel_enabled_and_canceled (cancelhandling)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ } + } + libc_hidden_def (__libc_cleanup_pop_restore) +diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c +index 9bac6e3b76a20312..2680b55586e035fe 100644 +--- a/nptl/pthread_cancel.c ++++ b/nptl/pthread_cancel.c +@@ -43,18 +43,29 @@ sigcancel_handler (int sig, siginfo_t *si, void *ctx) + + struct pthread *self = THREAD_SELF; + +- int ch = atomic_load_relaxed (&self->cancelhandling); +- /* Cancelation not enabled, not cancelled, or already exitting. */ +- if (self->cancelstate == PTHREAD_CANCEL_DISABLE +- || (ch & CANCELED_BITMASK) == 0 +- || (ch & EXITING_BITMASK) != 0) +- return; +- +- /* Set the return value. */ +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); +- /* Make sure asynchronous cancellation is still enabled. */ +- if (self->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __do_cancel (); ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ /* We are canceled now. When canceled by another thread this flag ++ is already set but if the signal is directly send (internally or ++ from another process) is has to be done here. */ ++ int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; ++ ++ if (oldval == newval || (oldval & EXITING_BITMASK) != 0) ++ /* Already canceled or exiting. */ ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ self->result = PTHREAD_CANCELED; ++ ++ /* Make sure asynchronous cancellation is still enabled. */ ++ if ((oldval & CANCELTYPE_BITMASK) != 0) ++ /* Run the registered destructors and terminate the thread. */ ++ __do_cancel (); ++ } ++ } + } + + int +@@ -93,29 +104,70 @@ __pthread_cancel (pthread_t th) + } + #endif + +- int oldch = atomic_fetch_or_acquire (&pd->cancelhandling, CANCELED_BITMASK); +- if ((oldch & CANCELED_BITMASK) != 0) +- return 0; +- +- if (pd == THREAD_SELF) ++ /* Some syscalls are never restarted after being interrupted by a signal ++ handler, regardless of the use of SA_RESTART (they always fail with ++ EINTR). So pthread_cancel cannot send SIGCANCEL unless the cancellation ++ is enabled and set as asynchronous (in this case the cancellation will ++ be acted in the cancellation handler instead by the syscall wrapper). ++ Otherwise the target thread is set as 'cancelling' (CANCELING_BITMASK) ++ by atomically setting 'cancelhandling' and the cancelation will be acted ++ upon on next cancellation entrypoing in the target thread. ++ ++ It also requires to atomically check if cancellation is enabled and ++ asynchronous, so both cancellation state and type are tracked on ++ 'cancelhandling'. */ ++ ++ int result = 0; ++ int oldval = atomic_load_relaxed (&pd->cancelhandling); ++ int newval; ++ do + { +- /* A single-threaded process should be able to kill itself, since there +- is nothing in the POSIX specification that says that it cannot. So +- we set multiple_threads to true so that cancellation points get +- executed. */ +- THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1); ++ newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; ++ if (oldval == newval) ++ break; ++ ++ /* If the cancellation is handled asynchronously just send a ++ signal. We avoid this if possible since it's more ++ expensive. */ ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ /* Mark the cancellation as "in progress". */ ++ int newval2 = oldval | CANCELING_BITMASK; ++ if (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, ++ &oldval, newval2)) ++ continue; ++ ++ if (pd == THREAD_SELF) ++ /* This is not merely an optimization: An application may ++ call pthread_cancel (pthread_self ()) without calling ++ pthread_create, so the signal handler may not have been ++ set up for a self-cancel. */ ++ { ++ pd->result = PTHREAD_CANCELED; ++ if ((newval & CANCELTYPE_BITMASK) != 0) ++ __do_cancel (); ++ } ++ else ++ /* The cancellation handler will take care of marking the ++ thread as canceled. */ ++ result = __pthread_kill_internal (th, SIGCANCEL); ++ ++ break; ++ } ++ ++ /* A single-threaded process should be able to kill itself, since ++ there is nothing in the POSIX specification that says that it ++ cannot. So we set multiple_threads to true so that cancellation ++ points get executed. */ ++ THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1); + #ifndef TLS_MULTIPLE_THREADS_IN_TCB + __libc_multiple_threads = 1; + #endif +- +- THREAD_SETMEM (pd, result, PTHREAD_CANCELED); +- if (pd->cancelstate == PTHREAD_CANCEL_ENABLE +- && pd->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __do_cancel (); +- return 0; + } ++ while (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, &oldval, ++ newval)); + +- return __pthread_kill_internal (th, SIGCANCEL); ++ return result; + } + versioned_symbol (libc, __pthread_cancel, pthread_cancel, GLIBC_2_34); + +diff --git a/nptl/pthread_join_common.c b/nptl/pthread_join_common.c +index 7303069316caef13..617056ef10671607 100644 +--- a/nptl/pthread_join_common.c ++++ b/nptl/pthread_join_common.c +@@ -57,12 +57,9 @@ __pthread_clockjoin_ex (pthread_t threadid, void **thread_return, + if ((pd == self + || (self->joinid == pd + && (pd->cancelhandling +- & (CANCELED_BITMASK | EXITING_BITMASK ++ & (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK + | TERMINATED_BITMASK)) == 0)) +- && !(self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (pd->cancelhandling & (CANCELED_BITMASK | EXITING_BITMASK +- | TERMINATED_BITMASK)) +- == CANCELED_BITMASK)) ++ && !cancel_enabled_and_canceled (self->cancelhandling)) + /* This is a deadlock situation. The threads are waiting for each + other to finish. Note that this is a "may" error. To be 100% + sure we catch this error we would have to lock the data +diff --git a/nptl/pthread_setcancelstate.c b/nptl/pthread_setcancelstate.c +index 7e2b6e4974bd58bd..cb567be5926816f1 100644 +--- a/nptl/pthread_setcancelstate.c ++++ b/nptl/pthread_setcancelstate.c +@@ -31,9 +31,29 @@ __pthread_setcancelstate (int state, int *oldstate) + + self = THREAD_SELF; + +- if (oldstate != NULL) +- *oldstate = self->cancelstate; +- self->cancelstate = state; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ int newval = (state == PTHREAD_CANCEL_DISABLE ++ ? oldval | CANCELSTATE_BITMASK ++ : oldval & ~CANCELSTATE_BITMASK); ++ ++ if (oldstate != NULL) ++ *oldstate = ((oldval & CANCELSTATE_BITMASK) ++ ? PTHREAD_CANCEL_DISABLE : PTHREAD_CANCEL_ENABLE); ++ ++ if (oldval == newval) ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ __do_cancel (); ++ ++ break; ++ } ++ } + + return 0; + } +diff --git a/nptl/pthread_setcanceltype.c b/nptl/pthread_setcanceltype.c +index e7b24ae733dcc0f2..e08ff7b141f904f1 100644 +--- a/nptl/pthread_setcanceltype.c ++++ b/nptl/pthread_setcanceltype.c +@@ -29,11 +29,32 @@ __pthread_setcanceltype (int type, int *oldtype) + + volatile struct pthread *self = THREAD_SELF; + +- if (oldtype != NULL) +- *oldtype = self->canceltype; +- self->canceltype = type; +- if (type == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ int newval = (type == PTHREAD_CANCEL_ASYNCHRONOUS ++ ? oldval | CANCELTYPE_BITMASK ++ : oldval & ~CANCELTYPE_BITMASK); ++ ++ if (oldtype != NULL) ++ *oldtype = ((oldval & CANCELTYPE_BITMASK) ++ ? PTHREAD_CANCEL_ASYNCHRONOUS : PTHREAD_CANCEL_DEFERRED); ++ ++ if (oldval == newval) ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ THREAD_SETMEM (self, result, PTHREAD_CANCELED); ++ __do_cancel (); ++ } ++ ++ break; ++ } ++ } + + return 0; + } +diff --git a/nptl/pthread_testcancel.c b/nptl/pthread_testcancel.c +index 31185d89f2ab84c6..25230215fd607e8b 100644 +--- a/nptl/pthread_testcancel.c ++++ b/nptl/pthread_testcancel.c +@@ -24,13 +24,10 @@ void + ___pthread_testcancel (void) + { + struct pthread *self = THREAD_SELF; +- int cancelhandling = THREAD_GETMEM (self, cancelhandling); +- if (self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (cancelhandling & CANCELED_BITMASK) +- && !(cancelhandling & EXITING_BITMASK) +- && !(cancelhandling & TERMINATED_BITMASK)) ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancel_enabled_and_canceled (cancelhandling)) + { +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); ++ self->result = PTHREAD_CANCELED; + __do_cancel (); + } + } +diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c +index b39dfbff2c6678d5..23aa4cfc0b784dfc 100644 +--- a/sysdeps/nptl/dl-tls_init_tp.c ++++ b/sysdeps/nptl/dl-tls_init_tp.c +@@ -107,7 +107,4 @@ __tls_init_tp (void) + It will be bigger than it actually is, but for unwind.c/pt-longjmp.c + purposes this is good enough. */ + THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end); +- +- THREAD_SETMEM (pd, cancelstate, PTHREAD_CANCEL_ENABLE); +- THREAD_SETMEM (pd, canceltype, PTHREAD_CANCEL_DEFERRED); + } +diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h +index 374657a2fd0ee19a..b968afc4c6b61b92 100644 +--- a/sysdeps/nptl/pthreadP.h ++++ b/sysdeps/nptl/pthreadP.h +@@ -276,7 +276,7 @@ __do_cancel (void) + struct pthread *self = THREAD_SELF; + + /* Make sure we get no more cancellations. */ +- THREAD_ATOMIC_BIT_SET (self, cancelhandling, EXITING_BIT); ++ atomic_bit_set (&self->cancelhandling, EXITING_BIT); + + __pthread_unwind ((__pthread_unwind_buf_t *) + THREAD_GETMEM (self, cleanup_jmp_buf)); +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index c65710169697ad95..00419c4d199df912 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -69,6 +69,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ + tst-cancel12 tst-cancel13 tst-cancel14 tst-cancel15 tst-cancel16 \ + tst-cancel18 tst-cancel19 tst-cancel20 tst-cancel21 \ + tst-cancel22 tst-cancel23 tst-cancel26 tst-cancel27 tst-cancel28 \ ++ tst-cancel29 \ + tst-cleanup0 tst-cleanup1 tst-cleanup2 tst-cleanup3 \ + tst-clock1 \ + tst-cond-except \ +diff --git a/sysdeps/pthread/tst-cancel29.c b/sysdeps/pthread/tst-cancel29.c +new file mode 100644 +index 0000000000000000..4f0d99e002883be4 +--- /dev/null ++++ b/sysdeps/pthread/tst-cancel29.c +@@ -0,0 +1,207 @@ ++/* Check if a thread that disables cancellation and which call functions ++ that might be interrupted by a signal do not see the internal SIGCANCEL. ++ ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* On Linux some interfaces are never restarted after being interrupted by ++ a signal handler, regardless of the use of SA_RESTART. It means that ++ if asynchronous cancellation is not enabled, the pthread_cancel can not ++ set the internal SIGCANCEL otherwise the interface might see a spurious ++ EINTR failure. */ ++ ++static pthread_barrier_t b; ++ ++/* Cleanup handling test. */ ++static int cl_called; ++static void ++cl (void *arg) ++{ ++ ++cl_called; ++} ++ ++static void * ++tf_sigtimedwait (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ sigset_t mask; ++ sigemptyset (&mask); ++ r = sigtimedwait (&mask, NULL, &(struct timespec) { 0, 250000000 }); ++ if (r != -1) ++ return (void*) -1; ++ if (errno != EAGAIN) ++ return (void*) -2; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_poll (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = poll (NULL, 0, 250); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_ppoll (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = ppoll (NULL, 0, &(struct timespec) { 0, 250000000 }, NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_select (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = select (0, NULL, NULL, NULL, &(struct timeval) { 0, 250000 }); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_pselect (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = pselect (0, NULL, NULL, NULL, &(struct timespec) { 0, 250000000 }, NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_clock_nanosleep (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = clock_nanosleep (CLOCK_REALTIME, 0, &(struct timespec) { 0, 250000000 }, ++ NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++struct cancel_test_t ++{ ++ const char *name; ++ void * (*cf) (void *); ++} tests[] = ++{ ++ { "sigtimedwait", tf_sigtimedwait, }, ++ { "poll", tf_poll, }, ++ { "ppoll", tf_ppoll, }, ++ { "select", tf_select, }, ++ { "pselect", tf_pselect , }, ++ { "clock_nanosleep", tf_clock_nanosleep, }, ++}; ++ ++static int ++do_test (void) ++{ ++ for (int i = 0; i < array_length (tests); i++) ++ { ++ xpthread_barrier_init (&b, NULL, 2); ++ ++ cl_called = 0; ++ ++ pthread_t th = xpthread_create (NULL, tests[i].cf, NULL); ++ ++ xpthread_barrier_wait (&b); ++ ++ struct timespec ts = { .tv_sec = 0, .tv_nsec = 100000000 }; ++ while (nanosleep (&ts, &ts) != 0) ++ continue; ++ ++ xpthread_cancel (th); ++ ++ void *status = xpthread_join (th); ++ if (status != NULL) ++ printf ("test '%s' failed: %" PRIdPTR "\n", tests[i].name, ++ (intptr_t) status); ++ TEST_VERIFY (status == NULL); ++ ++ xpthread_barrier_destroy (&b); ++ ++ TEST_COMPARE (cl_called, 0); ++ ++ printf ("in-time cancel test of '%s' successful\n", tests[i].name); ++ } ++ ++ return 0; ++} ++ ++#include diff --git a/SOURCES/glibc-upstream-2.34-164.patch b/SOURCES/glibc-upstream-2.34-164.patch new file mode 100644 index 0000000..dbe230b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-164.patch @@ -0,0 +1,23 @@ +commit 5d8c7776343b3f1b96ef7777e4504378f23c041a +Author: Samuel Thibault +Date: Tue Apr 12 22:14:34 2022 +0200 + + hurd: Fix arbitrary error code + + ELIBBAD is Linux-specific. + + (cherry picked from commit 67ab66541dc1164540abda284645e38be90b5119) + +diff --git a/nss/nss_test_errno.c b/nss/nss_test_errno.c +index 680f8a07b97fe263..59a5c717bebd296f 100644 +--- a/nss/nss_test_errno.c ++++ b/nss/nss_test_errno.c +@@ -28,7 +28,7 @@ static void __attribute__ ((constructor)) + init (void) + { + /* An arbitrary error code which is otherwise not used. */ +- errno = ELIBBAD; ++ errno = -1009; + } + + /* Lookup functions for pwd follow that do not return any data. */ diff --git a/SOURCES/glibc-upstream-2.34-165.patch b/SOURCES/glibc-upstream-2.34-165.patch new file mode 100644 index 0000000..1bab4fc --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-165.patch @@ -0,0 +1,104 @@ +commit b87b697f15d6bf7e576a2eeadc1f740172f9d013 +Author: =Joshua Kinard +Date: Mon Apr 18 09:55:08 2022 -0300 + + mips: Fix mips64n32 64 bit time_t stat support (BZ#29069) + + Add missing support initially added by 4e8521333bea6e89fcef1020 + (which missed n32 stat). + + (cherry picked from commit 78fb88827362fbd2cc8aa32892ae5b015106e25c) + +diff --git a/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h b/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h +index ab9f474cbc271b7c..ed5b1bc00ba52406 100644 +--- a/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h ++++ b/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h +@@ -131,27 +131,30 @@ struct stat64 + + struct stat + { ++# ifdef __USE_TIME_BITS64 ++# include ++# else + __dev_t st_dev; + int st_pad1[3]; /* Reserved for st_dev expansion */ +-# ifndef __USE_FILE_OFFSET64 ++# ifndef __USE_FILE_OFFSET64 + __ino_t st_ino; +-# else ++# else + __ino64_t st_ino; +-# endif ++# endif + __mode_t st_mode; + __nlink_t st_nlink; + __uid_t st_uid; + __gid_t st_gid; + __dev_t st_rdev; +-# if !defined __USE_FILE_OFFSET64 ++# if !defined __USE_FILE_OFFSET64 + unsigned int st_pad2[2]; /* Reserved for st_rdev expansion */ + __off_t st_size; + int st_pad3; +-# else ++# else + unsigned int st_pad2[3]; /* Reserved for st_rdev expansion */ + __off64_t st_size; +-# endif +-# ifdef __USE_XOPEN2K8 ++# endif ++# ifdef __USE_XOPEN2K8 + /* Nanosecond resolution timestamps are stored in a format + equivalent to 'struct timespec'. This is the type used + whenever possible but the Unix namespace rules do not allow the +@@ -161,30 +164,34 @@ struct stat + struct timespec st_atim; /* Time of last access. */ + struct timespec st_mtim; /* Time of last modification. */ + struct timespec st_ctim; /* Time of last status change. */ +-# define st_atime st_atim.tv_sec /* Backward compatibility. */ +-# define st_mtime st_mtim.tv_sec +-# define st_ctime st_ctim.tv_sec +-# else ++# define st_atime st_atim.tv_sec /* Backward compatibility. */ ++# define st_mtime st_mtim.tv_sec ++# define st_ctime st_ctim.tv_sec ++# else + __time_t st_atime; /* Time of last access. */ + unsigned long int st_atimensec; /* Nscecs of last access. */ + __time_t st_mtime; /* Time of last modification. */ + unsigned long int st_mtimensec; /* Nsecs of last modification. */ + __time_t st_ctime; /* Time of last status change. */ + unsigned long int st_ctimensec; /* Nsecs of last status change. */ +-# endif ++# endif + __blksize_t st_blksize; + unsigned int st_pad4; +-# ifndef __USE_FILE_OFFSET64 ++# ifndef __USE_FILE_OFFSET64 + __blkcnt_t st_blocks; +-# else ++# else + __blkcnt64_t st_blocks; +-# endif ++# endif + int st_pad5[14]; ++# endif + }; + + #ifdef __USE_LARGEFILE64 + struct stat64 + { ++# ifdef __USE_TIME_BITS64 ++# include ++# else + __dev_t st_dev; + unsigned int st_pad1[3]; /* Reserved for st_dev expansion */ + __ino64_t st_ino; +@@ -217,6 +224,7 @@ struct stat64 + unsigned int st_pad3; + __blkcnt64_t st_blocks; + int st_pad4[14]; ++# endif /* __USE_TIME_BITS64 */ + }; + #endif + diff --git a/SOURCES/glibc-upstream-2.34-166.patch b/SOURCES/glibc-upstream-2.34-166.patch new file mode 100644 index 0000000..c2db463 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-166.patch @@ -0,0 +1,35 @@ +commit 71326f1f2fd09dafb9c34404765fb88129e94237 +Author: Adhemerval Zanella +Date: Wed Apr 20 12:01:43 2022 -0300 + + nptl: Fix pthread_cancel cancelhandling atomic operations + + The 404656009b reversion did not setup the atomic loop to set the + cancel bits correctly. The fix is essentially what pthread_cancel + did prior 26cfbb7162ad. + + Checked on x86_64-linux-gnu and aarch64-linux-gnu. + + (cherry picked from commit 62be9681677e7ce820db721c126909979382d379) + +diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c +index 2680b55586e035fe..64fd183fde59907b 100644 +--- a/nptl/pthread_cancel.c ++++ b/nptl/pthread_cancel.c +@@ -122,6 +122,7 @@ __pthread_cancel (pthread_t th) + int newval; + do + { ++ again: + newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; + if (oldval == newval) + break; +@@ -135,7 +136,7 @@ __pthread_cancel (pthread_t th) + int newval2 = oldval | CANCELING_BITMASK; + if (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, + &oldval, newval2)) +- continue; ++ goto again; + + if (pd == THREAD_SELF) + /* This is not merely an optimization: An application may diff --git a/SOURCES/glibc-upstream-2.34-167.patch b/SOURCES/glibc-upstream-2.34-167.patch new file mode 100644 index 0000000..e00042d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-167.patch @@ -0,0 +1,1446 @@ +commit 3e0a91b79b409a6a4113a0fdb08221c0bb29cfce +Author: Florian Weimer +Date: Mon Apr 11 11:28:08 2022 +0200 + + scripts: Add glibcelf.py module + + Hopefully, this will lead to tests that are easier to maintain. The + current approach of parsing readelf -W output using regular expressions + is not necessarily easier than parsing the ELF data directly. + + This module is still somewhat incomplete (e.g., coverage of relocation + types and versioning information is missing), but it is sufficient to + perform basic symbol analysis or program header analysis. + + The EM_* mapping for architecture-specific constant classes (e.g., + SttX86_64) is not yet implemented. The classes are defined for the + benefit of elf/tst-glibcelf.py. + + Reviewed-by: Siddhesh Poyarekar + (cherry picked from commit 30035d67728a846fa39749cd162afd278ac654c4) + +diff --git a/elf/Makefile b/elf/Makefile +index 8e2dd91c583f9a62..8afbe3f6ab259331 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -1053,6 +1053,13 @@ CFLAGS-tst-prelink.c += -fno-pie + tst-prelink-no-pie = yes + endif + ++tests-special += $(objpfx)tst-glibcelf.out ++$(objpfx)tst-glibcelf.out: tst-glibcelf.py elf.h $(..)/scripts/glibcelf.py \ ++ $(..)/scripts/glibcextract.py ++ PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcelf.py \ ++ --cc="$(CC) $(patsubst -DMODULE_NAME=%,-DMODULE_NAME=testsuite,$(CPPFLAGS))" \ ++ < /dev/null > $@ 2>&1; $(evaluate-test) ++ + # The test requires shared _and_ PIE because the executable + # unit test driver must be able to link with the shared object + # that is going to eventually go into an installed DSO. +diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py +new file mode 100644 +index 0000000000000000..bf15a3bad4479e08 +--- /dev/null ++++ b/elf/tst-glibcelf.py +@@ -0,0 +1,260 @@ ++#!/usr/bin/python3 ++# Verify scripts/glibcelf.py contents against elf/elf.h. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++import argparse ++import enum ++import sys ++ ++import glibcelf ++import glibcextract ++ ++errors_encountered = 0 ++ ++def error(message): ++ global errors_encountered ++ sys.stdout.write('error: {}\n'.format(message)) ++ errors_encountered += 1 ++ ++# The enum constants in glibcelf are expected to have exactly these ++# prefixes. ++expected_constant_prefixes = tuple( ++ 'ELFCLASS ELFDATA EM_ ET_ DT_ PF_ PT_ SHF_ SHN_ SHT_ STB_ STT_'.split()) ++ ++def find_constant_prefix(name): ++ """Returns a matching prefix from expected_constant_prefixes or None.""" ++ for prefix in expected_constant_prefixes: ++ if name.startswith(prefix): ++ return prefix ++ return None ++ ++def find_enum_types(): ++ """A generator for OpenIntEnum and IntFlag classes in glibcelf.""" ++ for obj in vars(glibcelf).values(): ++ if isinstance(obj, type) and obj.__bases__[0] in ( ++ glibcelf._OpenIntEnum, enum.Enum, enum.IntFlag): ++ yield obj ++ ++def check_duplicates(): ++ """Verifies that enum types do not have duplicate values. ++ ++ Different types must have different member names, too. ++ ++ """ ++ global_seen = {} ++ for typ in find_enum_types(): ++ seen = {} ++ last = None ++ for (name, e) in typ.__members__.items(): ++ if e.value in seen: ++ error('{} has {}={} and {}={}'.format( ++ typ, seen[e.value], e.value, name, e.value)) ++ last = e ++ else: ++ seen[e.value] = name ++ if last is not None and last.value > e.value: ++ error('{} has {}={} after {}={}'.format( ++ typ, name, e.value, last.name, last.value)) ++ if name in global_seen: ++ error('{} used in {} and {}'.format( ++ name, global_seen[name], typ)) ++ else: ++ global_seen[name] = typ ++ ++def check_constant_prefixes(): ++ """Check that the constant prefixes match expected_constant_prefixes.""" ++ seen = set() ++ for typ in find_enum_types(): ++ typ_prefix = None ++ for val in typ: ++ prefix = find_constant_prefix(val.name) ++ if prefix is None: ++ error('constant {!r} for {} has unknown prefix'.format( ++ val, typ)) ++ break ++ elif typ_prefix is None: ++ typ_prefix = prefix ++ seen.add(typ_prefix) ++ elif prefix != typ_prefix: ++ error('prefix {!r} for constant {!r}, expected {!r}'.format( ++ prefix, val, typ_prefix)) ++ if typ_prefix is None: ++ error('empty enum type {}'.format(typ)) ++ ++ for prefix in sorted(set(expected_constant_prefixes) - seen): ++ error('missing constant prefix {!r}'.format(prefix)) ++ # Reverse difference is already covered inside the loop. ++ ++def find_elf_h_constants(cc): ++ """Returns a dictionary of relevant constants from .""" ++ return glibcextract.compute_macro_consts( ++ source_text='#include ', ++ cc=cc, ++ macro_re='|'.join( ++ prefix + '.*' for prefix in expected_constant_prefixes)) ++ ++# The first part of the pair is a name of an constant that is ++# dropped from glibcelf. The second part is the constant as it is ++# used in . ++glibcelf_skipped_aliases = ( ++ ('EM_ARC_A5', 'EM_ARC_COMPACT'), ++ ('PF_PARISC_SBP', 'PF_HP_SBP') ++) ++ ++# Constants that provide little value and are not included in ++# glibcelf: *LO*/*HI* range constants, *NUM constants counting the ++# number of constants. Also includes the alias names from ++# glibcelf_skipped_aliases. ++glibcelf_skipped_constants = frozenset( ++ [e[0] for e in glibcelf_skipped_aliases]) | frozenset(""" ++DT_AARCH64_NUM ++DT_ADDRNUM ++DT_ADDRRNGHI ++DT_ADDRRNGLO ++DT_ALPHA_NUM ++DT_ENCODING ++DT_EXTRANUM ++DT_HIOS ++DT_HIPROC ++DT_IA_64_NUM ++DT_LOOS ++DT_LOPROC ++DT_MIPS_NUM ++DT_NUM ++DT_PPC64_NUM ++DT_PPC_NUM ++DT_PROCNUM ++DT_SPARC_NUM ++DT_VALNUM ++DT_VALRNGHI ++DT_VALRNGLO ++DT_VERSIONTAGNUM ++ELFCLASSNUM ++ELFDATANUM ++ET_HIOS ++ET_HIPROC ++ET_LOOS ++ET_LOPROC ++ET_NUM ++PF_MASKOS ++PF_MASKPROC ++PT_HIOS ++PT_HIPROC ++PT_HISUNW ++PT_LOOS ++PT_LOPROC ++PT_LOSUNW ++SHF_MASKOS ++SHF_MASKPROC ++SHN_HIOS ++SHN_HIPROC ++SHN_HIRESERVE ++SHN_LOOS ++SHN_LOPROC ++SHN_LORESERVE ++SHT_HIOS ++SHT_HIPROC ++SHT_HIPROC ++SHT_HISUNW ++SHT_HIUSER ++SHT_LOOS ++SHT_LOPROC ++SHT_LOSUNW ++SHT_LOUSER ++SHT_NUM ++STB_HIOS ++STB_HIPROC ++STB_LOOS ++STB_LOPROC ++STB_NUM ++STT_HIOS ++STT_HIPROC ++STT_LOOS ++STT_LOPROC ++STT_NUM ++""".strip().split()) ++ ++def check_constant_values(cc): ++ """Checks the values of constants against glibcelf.""" ++ ++ glibcelf_constants = { ++ e.name: e for typ in find_enum_types() for e in typ} ++ elf_h_constants = find_elf_h_constants(cc=cc) ++ ++ missing_in_glibcelf = (set(elf_h_constants) - set(glibcelf_constants) ++ - glibcelf_skipped_constants) ++ for name in sorted(missing_in_glibcelf): ++ error('constant {} is missing from glibcelf'.format(name)) ++ ++ unexpected_in_glibcelf = \ ++ set(glibcelf_constants) & glibcelf_skipped_constants ++ for name in sorted(unexpected_in_glibcelf): ++ error('constant {} is supposed to be filtered from glibcelf'.format( ++ name)) ++ ++ missing_in_elf_h = set(glibcelf_constants) - set(elf_h_constants) ++ for name in sorted(missing_in_elf_h): ++ error('constant {} is missing from '.format(name)) ++ ++ expected_in_elf_h = glibcelf_skipped_constants - set(elf_h_constants) ++ for name in expected_in_elf_h: ++ error('filtered constant {} is missing from '.format(name)) ++ ++ for alias_name, name_in_glibcelf in glibcelf_skipped_aliases: ++ if name_in_glibcelf not in glibcelf_constants: ++ error('alias value {} for {} not in glibcelf'.format( ++ name_in_glibcelf, alias_name)) ++ elif (int(elf_h_constants[alias_name]) ++ != glibcelf_constants[name_in_glibcelf].value): ++ error(' has {}={}, glibcelf has {}={}'.format( ++ alias_name, elf_h_constants[alias_name], ++ name_in_glibcelf, glibcelf_constants[name_in_glibcelf])) ++ ++ # Check for value mismatches: ++ for name in sorted(set(glibcelf_constants) & set(elf_h_constants)): ++ glibcelf_value = glibcelf_constants[name].value ++ elf_h_value = int(elf_h_constants[name]) ++ # On 32-bit architectures as some constants that are ++ # parsed as signed, while they are unsigned in glibcelf. So ++ # far, this only affects some flag constants, so special-case ++ # them here. ++ if (glibcelf_value != elf_h_value ++ and not (isinstance(glibcelf_constants[name], enum.IntFlag) ++ and glibcelf_value == 1 << 31 ++ and elf_h_value == -(1 << 31))): ++ error('{}: glibcelf has {!r}, has {!r}'.format( ++ name, glibcelf_value, elf_h_value)) ++ ++def main(): ++ """The main entry point.""" ++ parser = argparse.ArgumentParser( ++ description="Check glibcelf.py and elf.h against each other.") ++ parser.add_argument('--cc', metavar='CC', ++ help='C compiler (including options) to use') ++ args = parser.parse_args() ++ ++ check_duplicates() ++ check_constant_prefixes() ++ check_constant_values(cc=args.cc) ++ ++ if errors_encountered > 0: ++ print("note: errors encountered:", errors_encountered) ++ sys.exit(1) ++ ++if __name__ == '__main__': ++ main() +diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py +new file mode 100644 +index 0000000000000000..8f7d0ca184845714 +--- /dev/null ++++ b/scripts/glibcelf.py +@@ -0,0 +1,1135 @@ ++#!/usr/bin/python3 ++# ELF support functionality for Python. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++"""Basic ELF parser. ++ ++Use Image.readfile(path) to read an ELF file into memory and begin ++parsing it. ++ ++""" ++ ++import collections ++import enum ++import struct ++ ++class _OpenIntEnum(enum.IntEnum): ++ """Integer enumeration that supports arbitrary int values.""" ++ @classmethod ++ def _missing_(cls, value): ++ # See enum.IntFlag._create_pseudo_member_. This allows ++ # creating of enum constants with arbitrary integer values. ++ pseudo_member = int.__new__(cls, value) ++ pseudo_member._name_ = None ++ pseudo_member._value_ = value ++ return pseudo_member ++ ++ def __repr__(self): ++ name = self._name_ ++ if name is not None: ++ # The names have prefixes like SHT_, implying their type. ++ return name ++ return '{}({})'.format(self.__class__.__name__, self._value_) ++ ++ def __str__(self): ++ name = self._name_ ++ if name is not None: ++ return name ++ return str(self._value_) ++ ++class ElfClass(_OpenIntEnum): ++ """ELF word size. Type of EI_CLASS values.""" ++ ELFCLASSNONE = 0 ++ ELFCLASS32 = 1 ++ ELFCLASS64 = 2 ++ ++class ElfData(_OpenIntEnum): ++ """ELF endianess. Type of EI_DATA values.""" ++ ELFDATANONE = 0 ++ ELFDATA2LSB = 1 ++ ELFDATA2MSB = 2 ++ ++class Machine(_OpenIntEnum): ++ """ELF machine type. Type of values in Ehdr.e_machine field.""" ++ EM_NONE = 0 ++ EM_M32 = 1 ++ EM_SPARC = 2 ++ EM_386 = 3 ++ EM_68K = 4 ++ EM_88K = 5 ++ EM_IAMCU = 6 ++ EM_860 = 7 ++ EM_MIPS = 8 ++ EM_S370 = 9 ++ EM_MIPS_RS3_LE = 10 ++ EM_PARISC = 15 ++ EM_VPP500 = 17 ++ EM_SPARC32PLUS = 18 ++ EM_960 = 19 ++ EM_PPC = 20 ++ EM_PPC64 = 21 ++ EM_S390 = 22 ++ EM_SPU = 23 ++ EM_V800 = 36 ++ EM_FR20 = 37 ++ EM_RH32 = 38 ++ EM_RCE = 39 ++ EM_ARM = 40 ++ EM_FAKE_ALPHA = 41 ++ EM_SH = 42 ++ EM_SPARCV9 = 43 ++ EM_TRICORE = 44 ++ EM_ARC = 45 ++ EM_H8_300 = 46 ++ EM_H8_300H = 47 ++ EM_H8S = 48 ++ EM_H8_500 = 49 ++ EM_IA_64 = 50 ++ EM_MIPS_X = 51 ++ EM_COLDFIRE = 52 ++ EM_68HC12 = 53 ++ EM_MMA = 54 ++ EM_PCP = 55 ++ EM_NCPU = 56 ++ EM_NDR1 = 57 ++ EM_STARCORE = 58 ++ EM_ME16 = 59 ++ EM_ST100 = 60 ++ EM_TINYJ = 61 ++ EM_X86_64 = 62 ++ EM_PDSP = 63 ++ EM_PDP10 = 64 ++ EM_PDP11 = 65 ++ EM_FX66 = 66 ++ EM_ST9PLUS = 67 ++ EM_ST7 = 68 ++ EM_68HC16 = 69 ++ EM_68HC11 = 70 ++ EM_68HC08 = 71 ++ EM_68HC05 = 72 ++ EM_SVX = 73 ++ EM_ST19 = 74 ++ EM_VAX = 75 ++ EM_CRIS = 76 ++ EM_JAVELIN = 77 ++ EM_FIREPATH = 78 ++ EM_ZSP = 79 ++ EM_MMIX = 80 ++ EM_HUANY = 81 ++ EM_PRISM = 82 ++ EM_AVR = 83 ++ EM_FR30 = 84 ++ EM_D10V = 85 ++ EM_D30V = 86 ++ EM_V850 = 87 ++ EM_M32R = 88 ++ EM_MN10300 = 89 ++ EM_MN10200 = 90 ++ EM_PJ = 91 ++ EM_OPENRISC = 92 ++ EM_ARC_COMPACT = 93 ++ EM_XTENSA = 94 ++ EM_VIDEOCORE = 95 ++ EM_TMM_GPP = 96 ++ EM_NS32K = 97 ++ EM_TPC = 98 ++ EM_SNP1K = 99 ++ EM_ST200 = 100 ++ EM_IP2K = 101 ++ EM_MAX = 102 ++ EM_CR = 103 ++ EM_F2MC16 = 104 ++ EM_MSP430 = 105 ++ EM_BLACKFIN = 106 ++ EM_SE_C33 = 107 ++ EM_SEP = 108 ++ EM_ARCA = 109 ++ EM_UNICORE = 110 ++ EM_EXCESS = 111 ++ EM_DXP = 112 ++ EM_ALTERA_NIOS2 = 113 ++ EM_CRX = 114 ++ EM_XGATE = 115 ++ EM_C166 = 116 ++ EM_M16C = 117 ++ EM_DSPIC30F = 118 ++ EM_CE = 119 ++ EM_M32C = 120 ++ EM_TSK3000 = 131 ++ EM_RS08 = 132 ++ EM_SHARC = 133 ++ EM_ECOG2 = 134 ++ EM_SCORE7 = 135 ++ EM_DSP24 = 136 ++ EM_VIDEOCORE3 = 137 ++ EM_LATTICEMICO32 = 138 ++ EM_SE_C17 = 139 ++ EM_TI_C6000 = 140 ++ EM_TI_C2000 = 141 ++ EM_TI_C5500 = 142 ++ EM_TI_ARP32 = 143 ++ EM_TI_PRU = 144 ++ EM_MMDSP_PLUS = 160 ++ EM_CYPRESS_M8C = 161 ++ EM_R32C = 162 ++ EM_TRIMEDIA = 163 ++ EM_QDSP6 = 164 ++ EM_8051 = 165 ++ EM_STXP7X = 166 ++ EM_NDS32 = 167 ++ EM_ECOG1X = 168 ++ EM_MAXQ30 = 169 ++ EM_XIMO16 = 170 ++ EM_MANIK = 171 ++ EM_CRAYNV2 = 172 ++ EM_RX = 173 ++ EM_METAG = 174 ++ EM_MCST_ELBRUS = 175 ++ EM_ECOG16 = 176 ++ EM_CR16 = 177 ++ EM_ETPU = 178 ++ EM_SLE9X = 179 ++ EM_L10M = 180 ++ EM_K10M = 181 ++ EM_AARCH64 = 183 ++ EM_AVR32 = 185 ++ EM_STM8 = 186 ++ EM_TILE64 = 187 ++ EM_TILEPRO = 188 ++ EM_MICROBLAZE = 189 ++ EM_CUDA = 190 ++ EM_TILEGX = 191 ++ EM_CLOUDSHIELD = 192 ++ EM_COREA_1ST = 193 ++ EM_COREA_2ND = 194 ++ EM_ARCV2 = 195 ++ EM_OPEN8 = 196 ++ EM_RL78 = 197 ++ EM_VIDEOCORE5 = 198 ++ EM_78KOR = 199 ++ EM_56800EX = 200 ++ EM_BA1 = 201 ++ EM_BA2 = 202 ++ EM_XCORE = 203 ++ EM_MCHP_PIC = 204 ++ EM_INTELGT = 205 ++ EM_KM32 = 210 ++ EM_KMX32 = 211 ++ EM_EMX16 = 212 ++ EM_EMX8 = 213 ++ EM_KVARC = 214 ++ EM_CDP = 215 ++ EM_COGE = 216 ++ EM_COOL = 217 ++ EM_NORC = 218 ++ EM_CSR_KALIMBA = 219 ++ EM_Z80 = 220 ++ EM_VISIUM = 221 ++ EM_FT32 = 222 ++ EM_MOXIE = 223 ++ EM_AMDGPU = 224 ++ EM_RISCV = 243 ++ EM_BPF = 247 ++ EM_CSKY = 252 ++ EM_NUM = 253 ++ EM_ALPHA = 0x9026 ++ ++class Et(_OpenIntEnum): ++ """ELF file type. Type of ET_* values and the Ehdr.e_type field.""" ++ ET_NONE = 0 ++ ET_REL = 1 ++ ET_EXEC = 2 ++ ET_DYN = 3 ++ ET_CORE = 4 ++ ++class Shn(_OpenIntEnum): ++ """ELF reserved section indices.""" ++ SHN_UNDEF = 0 ++ SHN_BEFORE = 0xff00 ++ SHN_AFTER = 0xff01 ++ SHN_ABS = 0xfff1 ++ SHN_COMMON = 0xfff2 ++ SHN_XINDEX = 0xffff ++ ++class ShnMIPS(enum.Enum): ++ """Supplemental SHN_* constants for EM_MIPS.""" ++ SHN_MIPS_ACOMMON = 0xff00 ++ SHN_MIPS_TEXT = 0xff01 ++ SHN_MIPS_DATA = 0xff02 ++ SHN_MIPS_SCOMMON = 0xff03 ++ SHN_MIPS_SUNDEFINED = 0xff04 ++ ++class ShnPARISC(enum.Enum): ++ """Supplemental SHN_* constants for EM_PARISC.""" ++ SHN_PARISC_ANSI_COMMON = 0xff00 ++ SHN_PARISC_HUGE_COMMON = 0xff01 ++ ++class Sht(_OpenIntEnum): ++ """ELF section types. Type of SHT_* values.""" ++ SHT_NULL = 0 ++ SHT_PROGBITS = 1 ++ SHT_SYMTAB = 2 ++ SHT_STRTAB = 3 ++ SHT_RELA = 4 ++ SHT_HASH = 5 ++ SHT_DYNAMIC = 6 ++ SHT_NOTE = 7 ++ SHT_NOBITS = 8 ++ SHT_REL = 9 ++ SHT_SHLIB = 10 ++ SHT_DYNSYM = 11 ++ SHT_INIT_ARRAY = 14 ++ SHT_FINI_ARRAY = 15 ++ SHT_PREINIT_ARRAY = 16 ++ SHT_GROUP = 17 ++ SHT_SYMTAB_SHNDX = 18 ++ SHT_GNU_ATTRIBUTES = 0x6ffffff5 ++ SHT_GNU_HASH = 0x6ffffff6 ++ SHT_GNU_LIBLIST = 0x6ffffff7 ++ SHT_CHECKSUM = 0x6ffffff8 ++ SHT_SUNW_move = 0x6ffffffa ++ SHT_SUNW_COMDAT = 0x6ffffffb ++ SHT_SUNW_syminfo = 0x6ffffffc ++ SHT_GNU_verdef = 0x6ffffffd ++ SHT_GNU_verneed = 0x6ffffffe ++ SHT_GNU_versym = 0x6fffffff ++ ++class ShtALPHA(enum.Enum): ++ """Supplemental SHT_* constants for EM_ALPHA.""" ++ SHT_ALPHA_DEBUG = 0x70000001 ++ SHT_ALPHA_REGINFO = 0x70000002 ++ ++class ShtARM(enum.Enum): ++ """Supplemental SHT_* constants for EM_ARM.""" ++ SHT_ARM_EXIDX = 0x70000001 ++ SHT_ARM_PREEMPTMAP = 0x70000002 ++ SHT_ARM_ATTRIBUTES = 0x70000003 ++ ++class ShtCSKY(enum.Enum): ++ """Supplemental SHT_* constants for EM_CSKY.""" ++ SHT_CSKY_ATTRIBUTES = 0x70000001 ++ ++class ShtIA_64(enum.Enum): ++ """Supplemental SHT_* constants for EM_IA_64.""" ++ SHT_IA_64_EXT = 0x70000000 ++ SHT_IA_64_UNWIND = 0x70000001 ++ ++class ShtMIPS(enum.Enum): ++ """Supplemental SHT_* constants for EM_MIPS.""" ++ SHT_MIPS_LIBLIST = 0x70000000 ++ SHT_MIPS_MSYM = 0x70000001 ++ SHT_MIPS_CONFLICT = 0x70000002 ++ SHT_MIPS_GPTAB = 0x70000003 ++ SHT_MIPS_UCODE = 0x70000004 ++ SHT_MIPS_DEBUG = 0x70000005 ++ SHT_MIPS_REGINFO = 0x70000006 ++ SHT_MIPS_PACKAGE = 0x70000007 ++ SHT_MIPS_PACKSYM = 0x70000008 ++ SHT_MIPS_RELD = 0x70000009 ++ SHT_MIPS_IFACE = 0x7000000b ++ SHT_MIPS_CONTENT = 0x7000000c ++ SHT_MIPS_OPTIONS = 0x7000000d ++ SHT_MIPS_SHDR = 0x70000010 ++ SHT_MIPS_FDESC = 0x70000011 ++ SHT_MIPS_EXTSYM = 0x70000012 ++ SHT_MIPS_DENSE = 0x70000013 ++ SHT_MIPS_PDESC = 0x70000014 ++ SHT_MIPS_LOCSYM = 0x70000015 ++ SHT_MIPS_AUXSYM = 0x70000016 ++ SHT_MIPS_OPTSYM = 0x70000017 ++ SHT_MIPS_LOCSTR = 0x70000018 ++ SHT_MIPS_LINE = 0x70000019 ++ SHT_MIPS_RFDESC = 0x7000001a ++ SHT_MIPS_DELTASYM = 0x7000001b ++ SHT_MIPS_DELTAINST = 0x7000001c ++ SHT_MIPS_DELTACLASS = 0x7000001d ++ SHT_MIPS_DWARF = 0x7000001e ++ SHT_MIPS_DELTADECL = 0x7000001f ++ SHT_MIPS_SYMBOL_LIB = 0x70000020 ++ SHT_MIPS_EVENTS = 0x70000021 ++ SHT_MIPS_TRANSLATE = 0x70000022 ++ SHT_MIPS_PIXIE = 0x70000023 ++ SHT_MIPS_XLATE = 0x70000024 ++ SHT_MIPS_XLATE_DEBUG = 0x70000025 ++ SHT_MIPS_WHIRL = 0x70000026 ++ SHT_MIPS_EH_REGION = 0x70000027 ++ SHT_MIPS_XLATE_OLD = 0x70000028 ++ SHT_MIPS_PDR_EXCEPTION = 0x70000029 ++ SHT_MIPS_XHASH = 0x7000002b ++ ++class ShtPARISC(enum.Enum): ++ """Supplemental SHT_* constants for EM_PARISC.""" ++ SHT_PARISC_EXT = 0x70000000 ++ SHT_PARISC_UNWIND = 0x70000001 ++ SHT_PARISC_DOC = 0x70000002 ++ ++class Pf(enum.IntFlag): ++ """Program header flags. Type of Phdr.p_flags values.""" ++ PF_X = 1 ++ PF_W = 2 ++ PF_R = 4 ++ ++class PfARM(enum.IntFlag): ++ """Supplemental PF_* flags for EM_ARM.""" ++ PF_ARM_SB = 0x10000000 ++ PF_ARM_PI = 0x20000000 ++ PF_ARM_ABS = 0x40000000 ++ ++class PfPARISC(enum.IntFlag): ++ """Supplemental PF_* flags for EM_PARISC.""" ++ PF_HP_PAGE_SIZE = 0x00100000 ++ PF_HP_FAR_SHARED = 0x00200000 ++ PF_HP_NEAR_SHARED = 0x00400000 ++ PF_HP_CODE = 0x01000000 ++ PF_HP_MODIFY = 0x02000000 ++ PF_HP_LAZYSWAP = 0x04000000 ++ PF_HP_SBP = 0x08000000 ++ ++class PfIA_64(enum.IntFlag): ++ """Supplemental PF_* flags for EM_IA_64.""" ++ PF_IA_64_NORECOV = 0x80000000 ++ ++class PfMIPS(enum.IntFlag): ++ """Supplemental PF_* flags for EM_MIPS.""" ++ PF_MIPS_LOCAL = 0x10000000 ++ ++class Shf(enum.IntFlag): ++ """Section flags. Type of Shdr.sh_type values.""" ++ SHF_WRITE = 1 << 0 ++ SHF_ALLOC = 1 << 1 ++ SHF_EXECINSTR = 1 << 2 ++ SHF_MERGE = 1 << 4 ++ SHF_STRINGS = 1 << 5 ++ SHF_INFO_LINK = 1 << 6 ++ SHF_LINK_ORDER = 1 << 7 ++ SHF_OS_NONCONFORMING = 256 ++ SHF_GROUP = 1 << 9 ++ SHF_TLS = 1 << 10 ++ SHF_COMPRESSED = 1 << 11 ++ SHF_GNU_RETAIN = 1 << 21 ++ SHF_ORDERED = 1 << 30 ++ SHF_EXCLUDE = 1 << 31 ++ ++class ShfALPHA(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_ALPHA.""" ++ SHF_ALPHA_GPREL = 0x10000000 ++ ++class ShfARM(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_ARM.""" ++ SHF_ARM_ENTRYSECT = 0x10000000 ++ SHF_ARM_COMDEF = 0x80000000 ++ ++class ShfIA_64(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_IA_64.""" ++ SHF_IA_64_SHORT = 0x10000000 ++ SHF_IA_64_NORECOV = 0x20000000 ++ ++class ShfMIPS(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_MIPS.""" ++ SHF_MIPS_GPREL = 0x10000000 ++ SHF_MIPS_MERGE = 0x20000000 ++ SHF_MIPS_ADDR = 0x40000000 ++ SHF_MIPS_STRINGS = 0x80000000 ++ SHF_MIPS_NOSTRIP = 0x08000000 ++ SHF_MIPS_LOCAL = 0x04000000 ++ SHF_MIPS_NAMES = 0x02000000 ++ SHF_MIPS_NODUPE = 0x01000000 ++ ++class ShfPARISC(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_PARISC.""" ++ SHF_PARISC_SHORT = 0x20000000 ++ SHF_PARISC_HUGE = 0x40000000 ++ SHF_PARISC_SBP = 0x80000000 ++ ++class Stb(_OpenIntEnum): ++ """ELF symbol binding type.""" ++ STB_LOCAL = 0 ++ STB_GLOBAL = 1 ++ STB_WEAK = 2 ++ STB_GNU_UNIQUE = 10 ++ STB_MIPS_SPLIT_COMMON = 13 ++ ++class Stt(_OpenIntEnum): ++ """ELF symbol type.""" ++ STT_NOTYPE = 0 ++ STT_OBJECT = 1 ++ STT_FUNC = 2 ++ STT_SECTION = 3 ++ STT_FILE = 4 ++ STT_COMMON = 5 ++ STT_TLS = 6 ++ STT_GNU_IFUNC = 10 ++ ++class SttARM(enum.Enum): ++ """Supplemental STT_* constants for EM_ARM.""" ++ STT_ARM_TFUNC = 13 ++ STT_ARM_16BIT = 15 ++ ++class SttPARISC(enum.Enum): ++ """Supplemental STT_* constants for EM_PARISC.""" ++ STT_HP_OPAQUE = 11 ++ STT_HP_STUB = 12 ++ STT_PARISC_MILLICODE = 13 ++ ++class SttSPARC(enum.Enum): ++ """Supplemental STT_* constants for EM_SPARC.""" ++ STT_SPARC_REGISTER = 13 ++ ++class SttX86_64(enum.Enum): ++ """Supplemental STT_* constants for EM_X86_64.""" ++ SHT_X86_64_UNWIND = 0x70000001 ++ ++class Pt(_OpenIntEnum): ++ """ELF program header types. Type of Phdr.p_type.""" ++ PT_NULL = 0 ++ PT_LOAD = 1 ++ PT_DYNAMIC = 2 ++ PT_INTERP = 3 ++ PT_NOTE = 4 ++ PT_SHLIB = 5 ++ PT_PHDR = 6 ++ PT_TLS = 7 ++ PT_NUM = 8 ++ PT_GNU_EH_FRAME = 0x6474e550 ++ PT_GNU_STACK = 0x6474e551 ++ PT_GNU_RELRO = 0x6474e552 ++ PT_GNU_PROPERTY = 0x6474e553 ++ PT_SUNWBSS = 0x6ffffffa ++ PT_SUNWSTACK = 0x6ffffffb ++ ++class PtARM(enum.Enum): ++ """Supplemental PT_* constants for EM_ARM.""" ++ PT_ARM_EXIDX = 0x70000001 ++ ++class PtIA_64(enum.Enum): ++ """Supplemental PT_* constants for EM_IA_64.""" ++ PT_IA_64_HP_OPT_ANOT = 0x60000012 ++ PT_IA_64_HP_HSL_ANOT = 0x60000013 ++ PT_IA_64_HP_STACK = 0x60000014 ++ PT_IA_64_ARCHEXT = 0x70000000 ++ PT_IA_64_UNWIND = 0x70000001 ++ ++class PtMIPS(enum.Enum): ++ """Supplemental PT_* constants for EM_MIPS.""" ++ PT_MIPS_REGINFO = 0x70000000 ++ PT_MIPS_RTPROC = 0x70000001 ++ PT_MIPS_OPTIONS = 0x70000002 ++ PT_MIPS_ABIFLAGS = 0x70000003 ++ ++class PtPARISC(enum.Enum): ++ """Supplemental PT_* constants for EM_PARISC.""" ++ PT_HP_TLS = 0x60000000 ++ PT_HP_CORE_NONE = 0x60000001 ++ PT_HP_CORE_VERSION = 0x60000002 ++ PT_HP_CORE_KERNEL = 0x60000003 ++ PT_HP_CORE_COMM = 0x60000004 ++ PT_HP_CORE_PROC = 0x60000005 ++ PT_HP_CORE_LOADABLE = 0x60000006 ++ PT_HP_CORE_STACK = 0x60000007 ++ PT_HP_CORE_SHM = 0x60000008 ++ PT_HP_CORE_MMF = 0x60000009 ++ PT_HP_PARALLEL = 0x60000010 ++ PT_HP_FASTBIND = 0x60000011 ++ PT_HP_OPT_ANNOT = 0x60000012 ++ PT_HP_HSL_ANNOT = 0x60000013 ++ PT_HP_STACK = 0x60000014 ++ PT_PARISC_ARCHEXT = 0x70000000 ++ PT_PARISC_UNWIND = 0x70000001 ++ ++class Dt(_OpenIntEnum): ++ """ELF dynamic segment tags. Type of Dyn.d_val.""" ++ DT_NULL = 0 ++ DT_NEEDED = 1 ++ DT_PLTRELSZ = 2 ++ DT_PLTGOT = 3 ++ DT_HASH = 4 ++ DT_STRTAB = 5 ++ DT_SYMTAB = 6 ++ DT_RELA = 7 ++ DT_RELASZ = 8 ++ DT_RELAENT = 9 ++ DT_STRSZ = 10 ++ DT_SYMENT = 11 ++ DT_INIT = 12 ++ DT_FINI = 13 ++ DT_SONAME = 14 ++ DT_RPATH = 15 ++ DT_SYMBOLIC = 16 ++ DT_REL = 17 ++ DT_RELSZ = 18 ++ DT_RELENT = 19 ++ DT_PLTREL = 20 ++ DT_DEBUG = 21 ++ DT_TEXTREL = 22 ++ DT_JMPREL = 23 ++ DT_BIND_NOW = 24 ++ DT_INIT_ARRAY = 25 ++ DT_FINI_ARRAY = 26 ++ DT_INIT_ARRAYSZ = 27 ++ DT_FINI_ARRAYSZ = 28 ++ DT_RUNPATH = 29 ++ DT_FLAGS = 30 ++ DT_PREINIT_ARRAY = 32 ++ DT_PREINIT_ARRAYSZ = 33 ++ DT_SYMTAB_SHNDX = 34 ++ DT_GNU_PRELINKED = 0x6ffffdf5 ++ DT_GNU_CONFLICTSZ = 0x6ffffdf6 ++ DT_GNU_LIBLISTSZ = 0x6ffffdf7 ++ DT_CHECKSUM = 0x6ffffdf8 ++ DT_PLTPADSZ = 0x6ffffdf9 ++ DT_MOVEENT = 0x6ffffdfa ++ DT_MOVESZ = 0x6ffffdfb ++ DT_FEATURE_1 = 0x6ffffdfc ++ DT_POSFLAG_1 = 0x6ffffdfd ++ DT_SYMINSZ = 0x6ffffdfe ++ DT_SYMINENT = 0x6ffffdff ++ DT_GNU_HASH = 0x6ffffef5 ++ DT_TLSDESC_PLT = 0x6ffffef6 ++ DT_TLSDESC_GOT = 0x6ffffef7 ++ DT_GNU_CONFLICT = 0x6ffffef8 ++ DT_GNU_LIBLIST = 0x6ffffef9 ++ DT_CONFIG = 0x6ffffefa ++ DT_DEPAUDIT = 0x6ffffefb ++ DT_AUDIT = 0x6ffffefc ++ DT_PLTPAD = 0x6ffffefd ++ DT_MOVETAB = 0x6ffffefe ++ DT_SYMINFO = 0x6ffffeff ++ DT_VERSYM = 0x6ffffff0 ++ DT_RELACOUNT = 0x6ffffff9 ++ DT_RELCOUNT = 0x6ffffffa ++ DT_FLAGS_1 = 0x6ffffffb ++ DT_VERDEF = 0x6ffffffc ++ DT_VERDEFNUM = 0x6ffffffd ++ DT_VERNEED = 0x6ffffffe ++ DT_VERNEEDNUM = 0x6fffffff ++ DT_AUXILIARY = 0x7ffffffd ++ DT_FILTER = 0x7fffffff ++ ++class DtAARCH64(enum.Enum): ++ """Supplemental DT_* constants for EM_AARCH64.""" ++ DT_AARCH64_BTI_PLT = 0x70000001 ++ DT_AARCH64_PAC_PLT = 0x70000003 ++ DT_AARCH64_VARIANT_PCS = 0x70000005 ++ ++class DtALPHA(enum.Enum): ++ """Supplemental DT_* constants for EM_ALPHA.""" ++ DT_ALPHA_PLTRO = 0x70000000 ++ ++class DtALTERA_NIOS2(enum.Enum): ++ """Supplemental DT_* constants for EM_ALTERA_NIOS2.""" ++ DT_NIOS2_GP = 0x70000002 ++ ++class DtIA_64(enum.Enum): ++ """Supplemental DT_* constants for EM_IA_64.""" ++ DT_IA_64_PLT_RESERVE = 0x70000000 ++ ++class DtMIPS(enum.Enum): ++ """Supplemental DT_* constants for EM_MIPS.""" ++ DT_MIPS_RLD_VERSION = 0x70000001 ++ DT_MIPS_TIME_STAMP = 0x70000002 ++ DT_MIPS_ICHECKSUM = 0x70000003 ++ DT_MIPS_IVERSION = 0x70000004 ++ DT_MIPS_FLAGS = 0x70000005 ++ DT_MIPS_BASE_ADDRESS = 0x70000006 ++ DT_MIPS_MSYM = 0x70000007 ++ DT_MIPS_CONFLICT = 0x70000008 ++ DT_MIPS_LIBLIST = 0x70000009 ++ DT_MIPS_LOCAL_GOTNO = 0x7000000a ++ DT_MIPS_CONFLICTNO = 0x7000000b ++ DT_MIPS_LIBLISTNO = 0x70000010 ++ DT_MIPS_SYMTABNO = 0x70000011 ++ DT_MIPS_UNREFEXTNO = 0x70000012 ++ DT_MIPS_GOTSYM = 0x70000013 ++ DT_MIPS_HIPAGENO = 0x70000014 ++ DT_MIPS_RLD_MAP = 0x70000016 ++ DT_MIPS_DELTA_CLASS = 0x70000017 ++ DT_MIPS_DELTA_CLASS_NO = 0x70000018 ++ DT_MIPS_DELTA_INSTANCE = 0x70000019 ++ DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a ++ DT_MIPS_DELTA_RELOC = 0x7000001b ++ DT_MIPS_DELTA_RELOC_NO = 0x7000001c ++ DT_MIPS_DELTA_SYM = 0x7000001d ++ DT_MIPS_DELTA_SYM_NO = 0x7000001e ++ DT_MIPS_DELTA_CLASSSYM = 0x70000020 ++ DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021 ++ DT_MIPS_CXX_FLAGS = 0x70000022 ++ DT_MIPS_PIXIE_INIT = 0x70000023 ++ DT_MIPS_SYMBOL_LIB = 0x70000024 ++ DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025 ++ DT_MIPS_LOCAL_GOTIDX = 0x70000026 ++ DT_MIPS_HIDDEN_GOTIDX = 0x70000027 ++ DT_MIPS_PROTECTED_GOTIDX = 0x70000028 ++ DT_MIPS_OPTIONS = 0x70000029 ++ DT_MIPS_INTERFACE = 0x7000002a ++ DT_MIPS_DYNSTR_ALIGN = 0x7000002b ++ DT_MIPS_INTERFACE_SIZE = 0x7000002c ++ DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d ++ DT_MIPS_PERF_SUFFIX = 0x7000002e ++ DT_MIPS_COMPACT_SIZE = 0x7000002f ++ DT_MIPS_GP_VALUE = 0x70000030 ++ DT_MIPS_AUX_DYNAMIC = 0x70000031 ++ DT_MIPS_PLTGOT = 0x70000032 ++ DT_MIPS_RWPLT = 0x70000034 ++ DT_MIPS_RLD_MAP_REL = 0x70000035 ++ DT_MIPS_XHASH = 0x70000036 ++ ++class DtPPC(enum.Enum): ++ """Supplemental DT_* constants for EM_PPC.""" ++ DT_PPC_GOT = 0x70000000 ++ DT_PPC_OPT = 0x70000001 ++ ++class DtPPC64(enum.Enum): ++ """Supplemental DT_* constants for EM_PPC64.""" ++ DT_PPC64_GLINK = 0x70000000 ++ DT_PPC64_OPD = 0x70000001 ++ DT_PPC64_OPDSZ = 0x70000002 ++ DT_PPC64_OPT = 0x70000003 ++ ++class DtSPARC(enum.Enum): ++ """Supplemental DT_* constants for EM_SPARC.""" ++ DT_SPARC_REGISTER = 0x70000001 ++ ++class StInfo: ++ """ELF symbol binding and type. Type of the Sym.st_info field.""" ++ def __init__(self, arg0, arg1=None): ++ if isinstance(arg0, int) and arg1 is None: ++ self.bind = Stb(arg0 >> 4) ++ self.type = Stt(arg0 & 15) ++ else: ++ self.bind = Stb(arg0) ++ self.type = Stt(arg1) ++ ++ def value(self): ++ """Returns the raw value for the bind/type combination.""" ++ return (self.bind.value() << 4) | (self.type.value()) ++ ++# Type in an ELF file. Used for deserialization. ++_Layout = collections.namedtuple('_Layout', 'unpack size') ++ ++def _define_layouts(baseclass: type, layout32: str, layout64: str, ++ types=None, fields32=None): ++ """Assign variants dict to baseclass. ++ ++ The variants dict is indexed by (ElfClass, ElfData) pairs, and its ++ values are _Layout instances. ++ ++ """ ++ struct32 = struct.Struct(layout32) ++ struct64 = struct.Struct(layout64) ++ ++ # Check that the struct formats yield the right number of components. ++ for s in (struct32, struct64): ++ example = s.unpack(b' ' * s.size) ++ if len(example) != len(baseclass._fields): ++ raise ValueError('{!r} yields wrong field count: {} != {}'.format( ++ s.format, len(example), len(baseclass._fields))) ++ ++ # Check that field names in types are correct. ++ if types is None: ++ types = () ++ for n in types: ++ if n not in baseclass._fields: ++ raise ValueError('{} does not have field {!r}'.format( ++ baseclass.__name__, n)) ++ ++ if fields32 is not None \ ++ and set(fields32) != set(baseclass._fields): ++ raise ValueError('{!r} is not a permutation of the fields {!r}'.format( ++ fields32, baseclass._fields)) ++ ++ def unique_name(name, used_names = (set((baseclass.__name__,)) ++ | set(baseclass._fields) ++ | {n.__name__ ++ for n in (types or {}).values()})): ++ """Find a name that is not used for a class or field name.""" ++ candidate = name ++ n = 0 ++ while candidate in used_names: ++ n += 1 ++ candidate = '{}{}'.format(name, n) ++ used_names.add(candidate) ++ return candidate ++ ++ blob_name = unique_name('blob') ++ struct_unpack_name = unique_name('struct_unpack') ++ comps_name = unique_name('comps') ++ ++ layouts = {} ++ for (bits, elfclass, layout, fields) in ( ++ (32, ElfClass.ELFCLASS32, layout32, fields32), ++ (64, ElfClass.ELFCLASS64, layout64, None), ++ ): ++ for (elfdata, structprefix, funcsuffix) in ( ++ (ElfData.ELFDATA2LSB, '<', 'LE'), ++ (ElfData.ELFDATA2MSB, '>', 'BE'), ++ ): ++ env = { ++ baseclass.__name__: baseclass, ++ struct_unpack_name: struct.unpack, ++ } ++ ++ # Add the type converters. ++ if types: ++ for cls in types.values(): ++ env[cls.__name__] = cls ++ ++ funcname = ''.join( ++ ('unpack_', baseclass.__name__, str(bits), funcsuffix)) ++ ++ code = ''' ++def {funcname}({blob_name}): ++'''.format(funcname=funcname, blob_name=blob_name) ++ ++ indent = ' ' * 4 ++ unpack_call = '{}({!r}, {})'.format( ++ struct_unpack_name, structprefix + layout, blob_name) ++ field_names = ', '.join(baseclass._fields) ++ if types is None and fields is None: ++ code += '{}return {}({})\n'.format( ++ indent, baseclass.__name__, unpack_call) ++ else: ++ # Destructuring tuple assignment. ++ if fields is None: ++ code += '{}{} = {}\n'.format( ++ indent, field_names, unpack_call) ++ else: ++ # Use custom field order. ++ code += '{}{} = {}\n'.format( ++ indent, ', '.join(fields), unpack_call) ++ ++ # Perform the type conversions. ++ for n in baseclass._fields: ++ if n in types: ++ code += '{}{} = {}({})\n'.format( ++ indent, n, types[n].__name__, n) ++ # Create the named tuple. ++ code += '{}return {}({})\n'.format( ++ indent, baseclass.__name__, field_names) ++ ++ exec(code, env) ++ layouts[(elfclass, elfdata)] = _Layout( ++ env[funcname], struct.calcsize(layout)) ++ baseclass.layouts = layouts ++ ++ ++# Corresponds to EI_* indices into Elf*_Ehdr.e_indent. ++class Ident(collections.namedtuple('Ident', ++ 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')): ++ ++ def __new__(cls, *args): ++ """Construct an object from a blob or its constituent fields.""" ++ if len(args) == 1: ++ return cls.unpack(args[0]) ++ return cls.__base__.__new__(cls, *args) ++ ++ @staticmethod ++ def unpack(blob: memoryview) -> 'Ident': ++ """Parse raws data into a tuple.""" ++ ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \ ++ ei_pad = struct.unpack('4s5B7s', blob) ++ return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data), ++ ei_version, ei_osabi, ei_abiversion, ei_pad) ++ size = 16 ++ ++# Corresponds to Elf32_Ehdr and Elf64_Ehdr. ++Ehdr = collections.namedtuple('Ehdr', ++ 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags' ++ + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx') ++_define_layouts(Ehdr, ++ layout32='16s2H5I6H', ++ layout64='16s2HI3QI6H', ++ types=dict(e_ident=Ident, ++ e_machine=Machine, ++ e_type=Et, ++ e_shstrndx=Shn)) ++ ++# Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter. ++Phdr = collections.namedtuple('Phdr', ++ 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align') ++_define_layouts(Phdr, ++ layout32='8I', ++ fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr', ++ 'p_filesz', 'p_memsz', 'p_flags', 'p_align'), ++ layout64='2I6Q', ++ types=dict(p_type=Pt, p_flags=Pf)) ++ ++ ++# Corresponds to Elf32_Shdr and Elf64_Shdr. ++class Shdr(collections.namedtuple('Shdr', ++ 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info' ++ + ' sh_addralign sh_entsize')): ++ def resolve(self, strtab: 'StringTable') -> 'Shdr': ++ """Resolve sh_name using a string table.""" ++ return self.__class__(strtab.get(self[0]), *self[1:]) ++_define_layouts(Shdr, ++ layout32='10I', ++ layout64='2I4Q2I2Q', ++ types=dict(sh_type=Sht, ++ sh_flags=Shf, ++ sh_link=Shn)) ++ ++# Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the ++# d_un union is skipped, and d_ptr is missing (its representation in ++# Python would be identical to d_val). ++Dyn = collections.namedtuple('Dyn', 'd_tag d_val') ++_define_layouts(Dyn, ++ layout32='2i', ++ layout64='2q', ++ types=dict(d_tag=Dt)) ++ ++# Corresponds to Elf32_Sym and Elf64_Sym. ++class Sym(collections.namedtuple('Sym', ++ 'st_name st_info st_other st_shndx st_value st_size')): ++ def resolve(self, strtab: 'StringTable') -> 'Sym': ++ """Resolve st_name using a string table.""" ++ return self.__class__(strtab.get(self[0]), *self[1:]) ++_define_layouts(Sym, ++ layout32='3I2BH', ++ layout64='I2BH2Q', ++ fields32=('st_name', 'st_value', 'st_size', 'st_info', ++ 'st_other', 'st_shndx'), ++ types=dict(st_shndx=Shn, ++ st_info=StInfo)) ++ ++# Corresponds to Elf32_Rel and Elf64_Rel. ++Rel = collections.namedtuple('Rel', 'r_offset r_info') ++_define_layouts(Rel, ++ layout32='2I', ++ layout64='2Q') ++ ++# Corresponds to Elf32_Rel and Elf64_Rel. ++Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend') ++_define_layouts(Rela, ++ layout32='3I', ++ layout64='3Q') ++ ++class StringTable: ++ """ELF string table.""" ++ def __init__(self, blob): ++ """Create a new string table backed by the data in the blob. ++ ++ blob: a memoryview-like object ++ ++ """ ++ self.blob = blob ++ ++ def get(self, index) -> bytes: ++ """Returns the null-terminated byte string at the index.""" ++ blob = self.blob ++ endindex = index ++ while True: ++ if blob[endindex] == 0: ++ return bytes(blob[index:endindex]) ++ endindex += 1 ++ ++class Image: ++ """ELF image parser.""" ++ def __init__(self, image): ++ """Create an ELF image from binary image data. ++ ++ image: a memoryview-like object that supports efficient range ++ subscripting. ++ ++ """ ++ self.image = image ++ ident = self.read(Ident, 0) ++ classdata = (ident.ei_class, ident.ei_data) ++ # Set self.Ehdr etc. to the subtypes with the right parsers. ++ for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela): ++ setattr(self, typ.__name__, typ.layouts.get(classdata, None)) ++ ++ if self.Ehdr is not None: ++ self.ehdr = self.read(self.Ehdr, 0) ++ self._shdr_num = self._compute_shdr_num() ++ else: ++ self.ehdr = None ++ self._shdr_num = 0 ++ ++ self._section = {} ++ self._stringtab = {} ++ ++ if self._shdr_num > 0: ++ self._shdr_strtab = self._find_shdr_strtab() ++ else: ++ self._shdr_strtab = None ++ ++ @staticmethod ++ def readfile(path: str) -> 'Image': ++ """Reads the ELF file at the specified path.""" ++ with open(path, 'rb') as inp: ++ return Image(memoryview(inp.read())) ++ ++ def _compute_shdr_num(self) -> int: ++ """Computes the actual number of section headers.""" ++ shnum = self.ehdr.e_shnum ++ if shnum == 0: ++ if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0: ++ # No section headers. ++ return 0 ++ # Otherwise the extension mechanism is used (which may be ++ # needed because e_shnum is just 16 bits). ++ return self.read(self.Shdr, self.ehdr.e_shoff).sh_size ++ return shnum ++ ++ def _find_shdr_strtab(self) -> StringTable: ++ """Finds the section header string table (maybe via extensions).""" ++ shstrndx = self.ehdr.e_shstrndx ++ if shstrndx == Shn.SHN_XINDEX: ++ shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link ++ return self._find_stringtab(shstrndx) ++ ++ def read(self, typ: type, offset:int ): ++ """Reads an object at a specific offset. ++ ++ The type must have been enhanced using _define_variants. ++ ++ """ ++ return typ.unpack(self.image[offset: offset + typ.size]) ++ ++ def phdrs(self) -> Phdr: ++ """Generator iterating over the program headers.""" ++ if self.ehdr is None: ++ return ++ size = self.ehdr.e_phentsize ++ if size != self.Phdr.size: ++ raise ValueError('Unexpected Phdr size in ELF header: {} != {}' ++ .format(size, self.Phdr.size)) ++ ++ offset = self.ehdr.e_phoff ++ for _ in range(self.ehdr.e_phnum): ++ yield self.read(self.Phdr, offset) ++ offset += size ++ ++ def shdrs(self, resolve: bool=True) -> Shdr: ++ """Generator iterating over the section headers. ++ ++ If resolve, section names are automatically translated ++ using the section header string table. ++ ++ """ ++ if self._shdr_num == 0: ++ return ++ ++ size = self.ehdr.e_shentsize ++ if size != self.Shdr.size: ++ raise ValueError('Unexpected Shdr size in ELF header: {} != {}' ++ .format(size, self.Shdr.size)) ++ ++ offset = self.ehdr.e_shoff ++ for _ in range(self._shdr_num): ++ shdr = self.read(self.Shdr, offset) ++ if resolve: ++ shdr = shdr.resolve(self._shdr_strtab) ++ yield shdr ++ offset += size ++ ++ def dynamic(self) -> Dyn: ++ """Generator iterating over the dynamic segment.""" ++ for phdr in self.phdrs(): ++ if phdr.p_type == Pt.PT_DYNAMIC: ++ # Pick the first dynamic segment, like the loader. ++ if phdr.p_filesz == 0: ++ # Probably separated debuginfo. ++ return ++ offset = phdr.p_offset ++ end = offset + phdr.p_memsz ++ size = self.Dyn.size ++ while True: ++ next_offset = offset + size ++ if next_offset > end: ++ raise ValueError( ++ 'Dynamic segment size {} is not a multiple of Dyn size {}'.format( ++ phdr.p_memsz, size)) ++ yield self.read(self.Dyn, offset) ++ if next_offset == end: ++ return ++ offset = next_offset ++ ++ def syms(self, shdr: Shdr, resolve: bool=True) -> Sym: ++ """A generator iterating over a symbol table. ++ ++ If resolve, symbol names are automatically translated using ++ the string table for the symbol table. ++ ++ """ ++ assert shdr.sh_type == Sht.SHT_SYMTAB ++ size = shdr.sh_entsize ++ if size != self.Sym.size: ++ raise ValueError('Invalid symbol table entry size {}'.format(size)) ++ offset = shdr.sh_offset ++ end = shdr.sh_offset + shdr.sh_size ++ if resolve: ++ strtab = self._find_stringtab(shdr.sh_link) ++ while offset < end: ++ sym = self.read(self.Sym, offset) ++ if resolve: ++ sym = sym.resolve(strtab) ++ yield sym ++ offset += size ++ if offset != end: ++ raise ValueError('Symbol table is not a multiple of entry size') ++ ++ def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes: ++ """Looks up a string in a string table identified by its link index.""" ++ try: ++ strtab = self._stringtab[strtab_index] ++ except KeyError: ++ strtab = self._find_stringtab(strtab_index) ++ return strtab.get(strtab_offset) ++ ++ def find_section(self, shndx: Shn) -> Shdr: ++ """Returns the section header for the indexed section. ++ ++ The section name is not resolved. ++ """ ++ try: ++ return self._section[shndx] ++ except KeyError: ++ pass ++ if shndx in Shn: ++ raise ValueError('Reserved section index {}'.format(shndx)) ++ idx = shndx.value ++ if idx < 0 or idx > self._shdr_num: ++ raise ValueError('Section index {} out of range [0, {})'.format( ++ idx, self._shdr_num)) ++ shdr = self.read( ++ self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size) ++ self._section[shndx] = shdr ++ return shdr ++ ++ def _find_stringtab(self, sh_link: int) -> StringTable: ++ if sh_link in self._stringtab: ++ return self._stringtab ++ if sh_link < 0 or sh_link >= self._shdr_num: ++ raise ValueError('Section index {} out of range [0, {})'.format( ++ sh_link, self._shdr_num)) ++ shdr = self.read( ++ self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size) ++ if shdr.sh_type != Sht.SHT_STRTAB: ++ raise ValueError( ++ 'Section {} is not a string table: {}'.format( ++ sh_link, shdr.sh_type)) ++ strtab = StringTable( ++ self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size]) ++ # This could retrain essentially arbitrary amounts of data, ++ # but caching string tables seems important for performance. ++ self._stringtab[sh_link] = strtab ++ return strtab ++ ++ ++__all__ = [name for name in dir() if name[0].isupper()] diff --git a/SOURCES/glibc-upstream-2.34-168.patch b/SOURCES/glibc-upstream-2.34-168.patch new file mode 100644 index 0000000..49e07b7 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-168.patch @@ -0,0 +1,407 @@ +commit f0c71b34f96c816292c49122d50da3a511b67bf2 +Author: Florian Weimer +Date: Mon Apr 11 11:30:31 2022 +0200 + + Default to --with-default-link=no (bug 25812) + + This is necessary to place the libio vtables into the RELRO segment. + New tests elf/tst-relro-ldso and elf/tst-relro-libc are added to + verify that this is what actually happens. + + The new tests fail on ia64 due to lack of (default) RELRO support + inbutils, so they are XFAILed there. + + (cherry picked from commit 198abcbb94618730dae1b3f4393efaa49e0ec8c7) + +diff --git a/INSTALL b/INSTALL +index d8d4e9f155f56616..60d01568d77645c7 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -90,6 +90,12 @@ if 'CFLAGS' is specified it must enable optimization. For example: + library will still be usable, but functionality may be lost--for + example, you can't build a shared libc with old binutils. + ++'--with-default-link=FLAG' ++ With '--with-default-link=yes', the build system does not use a ++ custom linker script for linking shared objects. The default for ++ FLAG is the opposite, 'no', because the custom linker script is ++ needed for full RELRO protection. ++ + '--with-nonshared-cflags=CFLAGS' + Use additional compiler flags CFLAGS to build the parts of the + library which are always statically linked into applications and +diff --git a/configure b/configure +index 03f4e59e754b5463..34c64f8de44e3086 100755 +--- a/configure ++++ b/configure +@@ -3373,7 +3373,7 @@ fi + if test "${with_default_link+set}" = set; then : + withval=$with_default_link; use_default_link=$withval + else +- use_default_link=default ++ use_default_link=no + fi + + +@@ -6085,69 +6085,6 @@ fi + $as_echo "$libc_cv_hashstyle" >&6; } + + +-# The linker's default -shared behavior is good enough if it +-# does these things that our custom linker scripts ensure that +-# all allocated NOTE sections come first. +-if test "$use_default_link" = default; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sufficient default -shared layout" >&5 +-$as_echo_n "checking for sufficient default -shared layout... " >&6; } +-if ${libc_cv_use_default_link+:} false; then : +- $as_echo_n "(cached) " >&6 +-else +- libc_cv_use_default_link=no +- cat > conftest.s <<\EOF +- .section .note.a,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "foo" +- .section .note.b,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "bar" +-EOF +- if { ac_try=' ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&5' +- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 +- (eval $ac_try) 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; } && +- ac_try=`$READELF -S conftest.so | sed -n \ +- '${x;p;} +- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/ +- t a +- b +- : a +- H'` +- then +- libc_seen_a=no libc_seen_b=no +- set -- $ac_try +- while test $# -ge 2 -a "$1" = NOTE; do +- case "$2" in +- .note.a) libc_seen_a=yes ;; +- .note.b) libc_seen_b=yes ;; +- esac +- shift 2 +- done +- case "$libc_seen_a$libc_seen_b" in +- yesyes) +- libc_cv_use_default_link=yes +- ;; +- *) +- echo >&5 "\ +-$libc_seen_a$libc_seen_b from: +-$ac_try" +- ;; +- esac +- fi +- rm -f conftest* +-fi +-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_use_default_link" >&5 +-$as_echo "$libc_cv_use_default_link" >&6; } +- use_default_link=$libc_cv_use_default_link +-fi +- + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GLOB_DAT reloc" >&5 + $as_echo_n "checking for GLOB_DAT reloc... " >&6; } + if ${libc_cv_has_glob_dat+:} false; then : +diff --git a/configure.ac b/configure.ac +index eb9431875fae1b0e..2c69af0807266e7e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -153,7 +153,7 @@ AC_ARG_WITH([default-link], + AS_HELP_STRING([--with-default-link], + [do not use explicit linker scripts]), + [use_default_link=$withval], +- [use_default_link=default]) ++ [use_default_link=no]) + + dnl Additional build flags injection. + AC_ARG_WITH([nonshared-cflags], +@@ -1378,59 +1378,6 @@ fi + rm -f conftest*]) + AC_SUBST(libc_cv_hashstyle) + +-# The linker's default -shared behavior is good enough if it +-# does these things that our custom linker scripts ensure that +-# all allocated NOTE sections come first. +-if test "$use_default_link" = default; then +- AC_CACHE_CHECK([for sufficient default -shared layout], +- libc_cv_use_default_link, [dnl +- libc_cv_use_default_link=no +- cat > conftest.s <<\EOF +- .section .note.a,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "foo" +- .section .note.b,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "bar" +-EOF +- if AC_TRY_COMMAND([dnl +- ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD]) && +- ac_try=`$READELF -S conftest.so | sed -n \ +- ['${x;p;} +- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/ +- t a +- b +- : a +- H']` +- then +- libc_seen_a=no libc_seen_b=no +- set -- $ac_try +- while test $# -ge 2 -a "$1" = NOTE; do +- case "$2" in +- .note.a) libc_seen_a=yes ;; +- .note.b) libc_seen_b=yes ;; +- esac +- shift 2 +- done +- case "$libc_seen_a$libc_seen_b" in +- yesyes) +- libc_cv_use_default_link=yes +- ;; +- *) +- echo >&AS_MESSAGE_LOG_FD "\ +-$libc_seen_a$libc_seen_b from: +-$ac_try" +- ;; +- esac +- fi +- rm -f conftest*]) +- use_default_link=$libc_cv_use_default_link +-fi +- + AC_CACHE_CHECK(for GLOB_DAT reloc, + libc_cv_has_glob_dat, [dnl + cat > conftest.c < $@ 2>&1; $(evaluate-test) ++# The optional symbols are present in libc only if the architecture has ++# the GLIBC_2.0 symbol set in libc. ++$(objpfx)tst-relro-libc.out: tst-relro-symbols.py $(..)/scripts/glibcelf.py \ ++ $(common-objpfx)libc.so ++ $(PYTHON) tst-relro-symbols.py $(common-objpfx)libc.so \ ++ --required=_IO_cookie_jumps \ ++ --required=_IO_file_jumps \ ++ --required=_IO_file_jumps_maybe_mmap \ ++ --required=_IO_file_jumps_mmap \ ++ --required=_IO_helper_jumps \ ++ --required=_IO_mem_jumps \ ++ --required=_IO_obstack_jumps \ ++ --required=_IO_proc_jumps \ ++ --required=_IO_str_chk_jumps \ ++ --required=_IO_str_jumps \ ++ --required=_IO_strn_jumps \ ++ --required=_IO_wfile_jumps \ ++ --required=_IO_wfile_jumps_maybe_mmap \ ++ --required=_IO_wfile_jumps_mmap \ ++ --required=_IO_wmem_jumps \ ++ --required=_IO_wstr_jumps \ ++ --required=_IO_wstrn_jumps \ ++ --optional=_IO_old_cookie_jumps \ ++ --optional=_IO_old_file_jumps \ ++ --optional=_IO_old_proc_jumps \ ++ > $@ 2>&1; $(evaluate-test) ++ + tests += $(tests-execstack-$(have-z-execstack)) + ifeq ($(run-built-tests),yes) + tests-special += \ +diff --git a/elf/tst-relro-symbols.py b/elf/tst-relro-symbols.py +new file mode 100644 +index 0000000000000000..368ea3349f86bd81 +--- /dev/null ++++ b/elf/tst-relro-symbols.py +@@ -0,0 +1,137 @@ ++#!/usr/bin/python3 ++# Verify that certain symbols are covered by RELRO. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++"""Analyze a (shared) object to verify that certain symbols are ++present and covered by the PT_GNU_RELRO segment. ++ ++""" ++ ++import argparse ++import os.path ++import sys ++ ++# Make available glibc Python modules. ++sys.path.append(os.path.join( ++ os.path.dirname(os.path.realpath(__file__)), os.path.pardir, 'scripts')) ++ ++import glibcelf ++ ++def find_relro(path: str, img: glibcelf.Image) -> (int, int): ++ """Discover the address range of the PT_GNU_RELRO segment.""" ++ for phdr in img.phdrs(): ++ if phdr.p_type == glibcelf.Pt.PT_GNU_RELRO: ++ # The computation is not entirely accurate because ++ # _dl_protect_relro in elf/dl-reloc.c rounds both the ++ # start end and downwards using the run-time page size. ++ return phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz ++ sys.stdout.write('{}: error: no PT_GNU_RELRO segment\n'.format(path)) ++ sys.exit(1) ++ ++def check_in_relro(kind, relro_begin, relro_end, name, start, size, error): ++ """Check if a section or symbol falls within in the RELRO segment.""" ++ end = start + size - 1 ++ if not (relro_begin <= start < end < relro_end): ++ error( ++ '{} {!r} of size {} at 0x{:x} is not in RELRO range [0x{:x}, 0x{:x})'.format( ++ kind, name.decode('UTF-8'), start, size, ++ relro_begin, relro_end)) ++ ++def get_parser(): ++ """Return an argument parser for this script.""" ++ parser = argparse.ArgumentParser(description=__doc__) ++ parser.add_argument('object', help='path to object file to check') ++ parser.add_argument('--required', metavar='NAME', default=(), ++ help='required symbol names', nargs='*') ++ parser.add_argument('--optional', metavar='NAME', default=(), ++ help='required symbol names', nargs='*') ++ return parser ++ ++def main(argv): ++ """The main entry point.""" ++ parser = get_parser() ++ opts = parser.parse_args(argv) ++ img = glibcelf.Image.readfile(opts.object) ++ ++ required_symbols = frozenset([sym.encode('UTF-8') ++ for sym in opts.required]) ++ optional_symbols = frozenset([sym.encode('UTF-8') ++ for sym in opts.optional]) ++ check_symbols = required_symbols | optional_symbols ++ ++ # Tracks the symbols in check_symbols that have been found. ++ symbols_found = set() ++ ++ # Discover the extent of the RELRO segment. ++ relro_begin, relro_end = find_relro(opts.object, img) ++ symbol_table_found = False ++ ++ errors = False ++ def error(msg: str) -> None: ++ """Record an error condition and write a message to standard output.""" ++ nonlocal errors ++ errors = True ++ sys.stdout.write('{}: error: {}\n'.format(opts.object, msg)) ++ ++ # Iterate over section headers to find the symbol table. ++ for shdr in img.shdrs(): ++ if shdr.sh_type == glibcelf.Sht.SHT_SYMTAB: ++ symbol_table_found = True ++ for sym in img.syms(shdr): ++ if sym.st_name in check_symbols: ++ symbols_found.add(sym.st_name) ++ ++ # Validate symbol type, section, and size. ++ if sym.st_info.type != glibcelf.Stt.STT_OBJECT: ++ error('symbol {!r} has wrong type {}'.format( ++ sym.st_name.decode('UTF-8'), sym.st_info.type)) ++ if sym.st_shndx in glibcelf.Shn: ++ error('symbol {!r} has reserved section {}'.format( ++ sym.st_name.decode('UTF-8'), sym.st_shndx)) ++ continue ++ if sym.st_size == 0: ++ error('symbol {!r} has size zero'.format( ++ sym.st_name.decode('UTF-8'))) ++ continue ++ ++ check_in_relro('symbol', relro_begin, relro_end, ++ sym.st_name, sym.st_value, sym.st_size, ++ error) ++ continue # SHT_SYMTAB ++ if shdr.sh_name == b'.data.rel.ro' \ ++ or shdr.sh_name.startswith(b'.data.rel.ro.'): ++ check_in_relro('section', relro_begin, relro_end, ++ shdr.sh_name, shdr.sh_addr, shdr.sh_size, ++ error) ++ continue ++ ++ if required_symbols - symbols_found: ++ for sym in sorted(required_symbols - symbols_found): ++ error('symbol {!r} not found'.format(sym.decode('UTF-8'))) ++ ++ if errors: ++ sys.exit(1) ++ ++ if not symbol_table_found: ++ sys.stdout.write( ++ '{}: warning: no symbol table found (stripped object)\n'.format( ++ opts.object)) ++ sys.exit(77) ++ ++if __name__ == '__main__': ++ main(sys.argv[1:]) +diff --git a/manual/install.texi b/manual/install.texi +index 816b77a0a25a88a7..36a5af62bc5722b0 100644 +--- a/manual/install.texi ++++ b/manual/install.texi +@@ -117,6 +117,12 @@ problem and suppress these constructs, so that the library will still be + usable, but functionality may be lost---for example, you can't build a + shared libc with old binutils. + ++@item --with-default-link=@var{FLAG} ++With @code{--with-default-link=yes}, the build system does not use a ++custom linker script for linking shared objects. The default for ++@var{FLAG} is the opposite, @samp{no}, because the custom linker script ++is needed for full RELRO protection. ++ + @item --with-nonshared-cflags=@var{cflags} + Use additional compiler flags @var{cflags} to build the parts of the + library which are always statically linked into applications and +diff --git a/sysdeps/unix/sysv/linux/ia64/Makefile b/sysdeps/unix/sysv/linux/ia64/Makefile +index da85ba43e2d0ddef..c5cc41b3677d4a2a 100644 +--- a/sysdeps/unix/sysv/linux/ia64/Makefile ++++ b/sysdeps/unix/sysv/linux/ia64/Makefile +@@ -1,3 +1,9 @@ ++ifeq ($(subdir),elf) ++# ia64 does not support PT_GNU_RELRO. ++test-xfail-tst-relro-ldso = yes ++test-xfail-tst-relro-libc = yes ++endif ++ + ifeq ($(subdir),misc) + sysdep_headers += sys/rse.h + endif diff --git a/SOURCES/glibc-upstream-2.34-169.patch b/SOURCES/glibc-upstream-2.34-169.patch new file mode 100644 index 0000000..63cb452 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-169.patch @@ -0,0 +1,87 @@ +commit ca0faa140ff8cebe4c041d935f0f5eb480873d99 +Author: Joan Bruguera +Date: Mon Apr 11 19:49:56 2022 +0200 + + misc: Fix rare fortify crash on wchar funcs. [BZ 29030] + + If `__glibc_objsize (__o) == (size_t) -1` (i.e. `__o` is unknown size), fortify + checks should pass, and `__whatever_alias` should be called. + + Previously, `__glibc_objsize (__o) == (size_t) -1` was explicitly checked, but + on commit a643f60c53876b, this was moved into `__glibc_safe_or_unknown_len`. + + A comment says the -1 case should work as: "The -1 check is redundant because + since it implies that __glibc_safe_len_cond is true.". But this fails when: + * `__s > 1` + * `__osz == -1` (i.e. unknown size at compile time) + * `__l` is big enough + * `__l * __s <= __osz` can be folded to a constant + (I only found this to be true for `mbsrtowcs` and other functions in wchar2.h) + + In this case `__l * __s <= __osz` is false, and `__whatever_chk_warn` will be + called by `__glibc_fortify` or `__glibc_fortify_n` and crash the program. + + This commit adds the explicit `__osz == -1` check again. + moc crashes on startup due to this, see: https://bugs.archlinux.org/task/74041 + + Minimal test case (test.c): + #include + + int main (void) + { + const char *hw = "HelloWorld"; + mbsrtowcs (NULL, &hw, (size_t)-1, NULL); + return 0; + } + + Build with: + gcc -O2 -Wp,-D_FORTIFY_SOURCE=2 test.c -o test && ./test + + Output: + *** buffer overflow detected ***: terminated + + Fixes: BZ #29030 + Signed-off-by: Joan Bruguera + Signed-off-by: Siddhesh Poyarekar + (cherry picked from commit 33e03f9cd2be4f2cd62f93fda539cc07d9c8130e) + +diff --git a/debug/tst-fortify.c b/debug/tst-fortify.c +index 8b5902423cf0ad88..fb02452f5993c594 100644 +--- a/debug/tst-fortify.c ++++ b/debug/tst-fortify.c +@@ -1505,6 +1505,11 @@ do_test (void) + CHK_FAIL_END + #endif + ++ /* Bug 29030 regresion check */ ++ cp = "HelloWorld"; ++ if (mbsrtowcs (NULL, &cp, (size_t)-1, &s) != 10) ++ FAIL (); ++ + cp = "A"; + if (mbstowcs (wenough, cp, 10) != 1 + || wcscmp (wenough, L"A") != 0) +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index 515fb681a0547217..b36013b9a6b4d9c3 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -161,13 +161,13 @@ + || (__builtin_constant_p (__l) && (__l) > 0)) + + /* Length is known to be safe at compile time if the __L * __S <= __OBJSZ +- condition can be folded to a constant and if it is true. The -1 check is +- redundant because since it implies that __glibc_safe_len_cond is true. */ ++ condition can be folded to a constant and if it is true, or unknown (-1) */ + #define __glibc_safe_or_unknown_len(__l, __s, __osz) \ +- (__glibc_unsigned_or_positive (__l) \ +- && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ +- __s, __osz)) \ +- && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz)) ++ ((__osz) == (__SIZE_TYPE__) -1 \ ++ || (__glibc_unsigned_or_positive (__l) \ ++ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ ++ (__s), (__osz))) \ ++ && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), (__s), (__osz)))) + + /* Conversely, we know at compile time that the length is unsafe if the + __L * __S <= __OBJSZ condition can be folded to a constant and if it is diff --git a/SOURCES/glibc-upstream-2.34-170.patch b/SOURCES/glibc-upstream-2.34-170.patch new file mode 100644 index 0000000..11aa68c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-170.patch @@ -0,0 +1,49 @@ +commit 0d477e92c49db2906b32e44135b98746ccc73c7b +Author: Florian Weimer +Date: Tue Apr 26 14:22:10 2022 +0200 + + INSTALL: Rephrase -with-default-link documentation + + Reviewed-by: Carlos O'Donell + (cherry picked from commit c935789bdf40ba22b5698da869d3a4789797e09f) + +diff --git a/INSTALL b/INSTALL +index 60d01568d77645c7..10a3dcdc0a8db665 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -90,10 +90,10 @@ if 'CFLAGS' is specified it must enable optimization. For example: + library will still be usable, but functionality may be lost--for + example, you can't build a shared libc with old binutils. + +-'--with-default-link=FLAG' +- With '--with-default-link=yes', the build system does not use a +- custom linker script for linking shared objects. The default for +- FLAG is the opposite, 'no', because the custom linker script is ++'--with-default-link' ++ With '--with-default-link', the build system does not use a custom ++ linker script for linking shared objects. The default is ++ '--without-default-link', because the custom linker script is + needed for full RELRO protection. + + '--with-nonshared-cflags=CFLAGS' +diff --git a/manual/install.texi b/manual/install.texi +index 36a5af62bc5722b0..8e34ff7e1847f3ae 100644 +--- a/manual/install.texi ++++ b/manual/install.texi +@@ -117,11 +117,11 @@ problem and suppress these constructs, so that the library will still be + usable, but functionality may be lost---for example, you can't build a + shared libc with old binutils. + +-@item --with-default-link=@var{FLAG} +-With @code{--with-default-link=yes}, the build system does not use a +-custom linker script for linking shared objects. The default for +-@var{FLAG} is the opposite, @samp{no}, because the custom linker script +-is needed for full RELRO protection. ++@item --with-default-link ++With @code{--with-default-link}, the build system does not use a custom ++linker script for linking shared objects. The default is ++@code{--without-default-link}, because the custom linker script is ++needed for full RELRO protection. + + @item --with-nonshared-cflags=@var{cflags} + Use additional compiler flags @var{cflags} to build the parts of the diff --git a/SOURCES/glibc-upstream-2.34-171.patch b/SOURCES/glibc-upstream-2.34-171.patch new file mode 100644 index 0000000..04e6898 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-171.patch @@ -0,0 +1,377 @@ +commit bc56ab1f4aa937665034373d3e320d0779a839aa +Author: Florian Weimer +Date: Tue Apr 26 14:23:02 2022 +0200 + + dlfcn: Do not use rtld_active () to determine ld.so state (bug 29078) + + When audit modules are loaded, ld.so initialization is not yet + complete, and rtld_active () returns false even though ld.so is + mostly working. Instead, the static dlopen hook is used, but that + does not work at all because this is not a static dlopen situation. + + Commit 466c1ea15f461edb8e3ffaf5d86d708876343bbf ("dlfcn: Rework + static dlopen hooks") moved the hook pointer into _rtld_global_ro, + which means that separate protection is not needed anymore and the + hook pointer can be checked directly. + + The guard for disabling libio vtable hardening in _IO_vtable_check + should stay for now. + + Fixes commit 8e1472d2c1e25e6eabc2059170731365f6d5b3d1 ("ld.so: + Examine GLRO to detect inactive loader [BZ #20204]"). + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 8dcb6d0af07fda3607b541857e4f3970a74ed55b) + +diff --git a/dlfcn/dladdr.c b/dlfcn/dladdr.c +index 1cc305f0c46e7c3b..0d07ae1cd4dbb7a2 100644 +--- a/dlfcn/dladdr.c ++++ b/dlfcn/dladdr.c +@@ -24,7 +24,7 @@ int + __dladdr (const void *address, Dl_info *info) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dladdr (address, info); + #endif + return _dl_addr (address, info, NULL, NULL); +diff --git a/dlfcn/dladdr1.c b/dlfcn/dladdr1.c +index 78560dbac208c316..93ce68c1d6067fe2 100644 +--- a/dlfcn/dladdr1.c ++++ b/dlfcn/dladdr1.c +@@ -24,7 +24,7 @@ int + __dladdr1 (const void *address, Dl_info *info, void **extra, int flags) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dladdr1 (address, info, extra, flags); + #endif + +diff --git a/dlfcn/dlclose.c b/dlfcn/dlclose.c +index 6a013a81bb648191..07ecb21bf7d43be4 100644 +--- a/dlfcn/dlclose.c ++++ b/dlfcn/dlclose.c +@@ -24,7 +24,7 @@ int + __dlclose (void *handle) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlclose (handle); + #endif + +diff --git a/dlfcn/dlerror.c b/dlfcn/dlerror.c +index 5047b140662bc33e..63da79c63000eef0 100644 +--- a/dlfcn/dlerror.c ++++ b/dlfcn/dlerror.c +@@ -32,7 +32,7 @@ char * + __dlerror (void) + { + # ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlerror (); + # endif + +diff --git a/dlfcn/dlinfo.c b/dlfcn/dlinfo.c +index c6f9a1da09ff8622..47d2daa96fa5986f 100644 +--- a/dlfcn/dlinfo.c ++++ b/dlfcn/dlinfo.c +@@ -89,7 +89,7 @@ dlinfo_implementation (void *handle, int request, void *arg) + int + ___dlinfo (void *handle, int request, void *arg) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlinfo (handle, request, arg); + else + return dlinfo_implementation (handle, request, arg); +diff --git a/dlfcn/dlmopen.c b/dlfcn/dlmopen.c +index c171c8953da20fc7..2309224eb8484b1a 100644 +--- a/dlfcn/dlmopen.c ++++ b/dlfcn/dlmopen.c +@@ -80,7 +80,7 @@ dlmopen_implementation (Lmid_t nsid, const char *file, int mode, + void * + ___dlmopen (Lmid_t nsid, const char *file, int mode) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlmopen (nsid, file, mode, RETURN_ADDRESS (0)); + else + return dlmopen_implementation (nsid, file, mode, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlopen.c b/dlfcn/dlopen.c +index e04b374b82b04337..9c59c751c4eaf7a7 100644 +--- a/dlfcn/dlopen.c ++++ b/dlfcn/dlopen.c +@@ -75,7 +75,7 @@ dlopen_implementation (const char *file, int mode, void *dl_caller) + void * + ___dlopen (const char *file, int mode) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0)); + else + return dlopen_implementation (file, mode, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlopenold.c b/dlfcn/dlopenold.c +index 9115501ac121eeca..c2f2a42194d50953 100644 +--- a/dlfcn/dlopenold.c ++++ b/dlfcn/dlopenold.c +@@ -70,7 +70,7 @@ __dlopen_nocheck (const char *file, int mode) + mode |= RTLD_LAZY; + args.mode = mode; + +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0)); + + return _dlerror_run (dlopen_doit, &args) ? NULL : args.new; +diff --git a/dlfcn/dlsym.c b/dlfcn/dlsym.c +index 43044cf7bb95801e..d3861170a7631d01 100644 +--- a/dlfcn/dlsym.c ++++ b/dlfcn/dlsym.c +@@ -62,7 +62,7 @@ dlsym_implementation (void *handle, const char *name, void *dl_caller) + void * + ___dlsym (void *handle, const char *name) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlsym (handle, name, RETURN_ADDRESS (0)); + else + return dlsym_implementation (handle, name, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlvsym.c b/dlfcn/dlvsym.c +index 9b76f9afa513e11f..3af02109c306b800 100644 +--- a/dlfcn/dlvsym.c ++++ b/dlfcn/dlvsym.c +@@ -65,7 +65,7 @@ dlvsym_implementation (void *handle, const char *name, const char *version, + void * + ___dlvsym (void *handle, const char *name, const char *version) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlvsym (handle, name, version, + RETURN_ADDRESS (0)); + else +diff --git a/elf/Makefile b/elf/Makefile +index fec6e23b5b625e3b..c89a6a58690646ee 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -376,6 +376,7 @@ tests += \ + tst-audit24d \ + tst-audit25a \ + tst-audit25b \ ++ tst-audit26 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -721,6 +722,7 @@ modules-names = \ + tst-auditmod24c \ + tst-auditmod24d \ + tst-auditmod25 \ ++ tst-auditmod26 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2194,6 +2196,10 @@ $(objpfx)tst-audit25b: $(objpfx)tst-audit25mod1.so \ + LDFLAGS-tst-audit25b = -Wl,-z,now + tst-audit25b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit26.out: $(objpfx)tst-auditmod26.so ++$(objpfx)tst-auditmod26.so: $(libsupport) ++tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-libc.c b/elf/dl-libc.c +index d5bc4a277f4c6ef3..db4342a3256921f0 100644 +--- a/elf/dl-libc.c ++++ b/elf/dl-libc.c +@@ -157,7 +157,7 @@ __libc_dlopen_mode (const char *name, int mode) + args.caller_dlopen = RETURN_ADDRESS (0); + + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlopen_mode (name, mode); + #endif + return dlerror_run (do_dlopen, &args) ? NULL : (void *) args.map; +@@ -185,7 +185,7 @@ __libc_dlsym (void *map, const char *name) + args.name = name; + + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlsym (map, name); + #endif + return (dlerror_run (do_dlsym, &args) ? NULL +@@ -199,7 +199,7 @@ void * + __libc_dlvsym (void *map, const char *name, const char *version) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlvsym (map, name, version); + #endif + +@@ -222,7 +222,7 @@ int + __libc_dlclose (void *map) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlclose (map); + #endif + return dlerror_run (do_dlclose, map); +diff --git a/elf/tst-audit26.c b/elf/tst-audit26.c +new file mode 100644 +index 0000000000000000..3f920e83bac247a5 +--- /dev/null ++++ b/elf/tst-audit26.c +@@ -0,0 +1,35 @@ ++/* Check the usability of functions in audit modules. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ /* Check that the audit module has been loaded. */ ++ void *handle = xdlopen ("mapped to libc", RTLD_LOCAL | RTLD_NOW); ++ TEST_VERIFY (handle ++ == xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD)); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod26.c b/elf/tst-auditmod26.c +new file mode 100644 +index 0000000000000000..db7ba95abec20f53 +--- /dev/null ++++ b/elf/tst-auditmod26.c +@@ -0,0 +1,104 @@ ++/* Check the usability of functions in audit modules. Audit module. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++unsigned int ++la_version (unsigned int current) ++{ ++ /* Exercise various functions. */ ++ ++ /* Check dlopen, dlsym, dlclose. */ ++ void *handle = xdlopen (LIBM_SO, RTLD_LOCAL | RTLD_NOW); ++ void *ptr = xdlsym (handle, "sincos"); ++ TEST_VERIFY (ptr != NULL); ++ ptr = dlsym (handle, "SINCOS"); ++ TEST_VERIFY (ptr == NULL); ++ const char *message = dlerror (); ++ TEST_VERIFY (strstr (message, ": undefined symbol: SINCOS") != NULL); ++ ptr = dlsym (handle, "SINCOS"); ++ TEST_VERIFY (ptr == NULL); ++ xdlclose (handle); ++ TEST_COMPARE_STRING (dlerror (), NULL); ++ ++ handle = xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD); ++ ++ /* Check dlvsym. _exit is unlikely to gain another symbol ++ version. */ ++ TEST_VERIFY (xdlsym (handle, "_exit") ++ == xdlvsym (handle, "_exit", FIRST_VERSION_libc__exit_STRING)); ++ ++ /* Check dlinfo. */ ++ { ++ void *handle2 = NULL; ++ TEST_COMPARE (dlinfo (handle, RTLD_DI_LINKMAP, &handle2), 0); ++ TEST_VERIFY (handle2 == handle); ++ } ++ ++ /* Check dladdr and dladdr1. */ ++ Dl_info info = { }; ++ TEST_VERIFY (dladdr (&_exit, &info) != 0); ++ if (strcmp (info.dli_sname, "_Exit") != 0) /* _Exit is an alias. */ ++ TEST_COMPARE_STRING (info.dli_sname, "_exit"); ++ TEST_VERIFY (info.dli_saddr == &_exit); ++ TEST_VERIFY (strstr (info.dli_fname, LIBC_SO)); ++ void *extra_info; ++ memset (&info, 0, sizeof (info)); ++ TEST_VERIFY (dladdr1 (&_exit, &info, &extra_info, RTLD_DL_LINKMAP) != 0); ++ TEST_VERIFY (extra_info == handle); ++ ++ /* Verify that dlmopen creates a new namespace. */ ++ void *dlmopen_handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ TEST_VERIFY (dlmopen_handle != handle); ++ memset (&info, 0, sizeof (info)); ++ extra_info = NULL; ++ ptr = xdlsym (dlmopen_handle, "_exit"); ++ TEST_VERIFY (dladdr1 (ptr, &info, &extra_info, RTLD_DL_LINKMAP) != 0); ++ TEST_VERIFY (extra_info == dlmopen_handle); ++ xdlclose (dlmopen_handle); ++ ++ /* Terminate the process with an error state. This does not happen ++ automatically because the audit module state is not shared with ++ the main program. */ ++ if (support_record_failure_is_failed ()) ++ { ++ fflush (stdout); ++ fflush (stderr); ++ _exit (1); ++ } ++ ++ return LAV_CURRENT; ++} ++ ++char * ++la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) ++{ ++ if (strcmp (name, "mapped to libc") == 0) ++ return (char *) LIBC_SO; ++ else ++ return (char *) name; ++} diff --git a/SOURCES/glibc-upstream-2.34-172.patch b/SOURCES/glibc-upstream-2.34-172.patch new file mode 100644 index 0000000..06dc695 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-172.patch @@ -0,0 +1,28 @@ +commit 83cc145830bdbefdabe03787ed884d548bea9c99 +Author: Florian Weimer +Date: Fri Apr 22 19:34:52 2022 +0200 + + scripts/glibcelf.py: Mark as UNSUPPORTED on Python 3.5 and earlier + + enum.IntFlag and enum.EnumMeta._missing_ support are not part of + earlier Python versions. + + (cherry picked from commit b571f3adffdcbed23f35ea39b0ca43809dbb4f5b) + +diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py +index 8f7d0ca184845714..da0d5380f33a195e 100644 +--- a/scripts/glibcelf.py ++++ b/scripts/glibcelf.py +@@ -28,6 +28,12 @@ import collections + import enum + import struct + ++if not hasattr(enum, 'IntFlag'): ++ import sys ++ sys.stdout.write( ++ 'warning: glibcelf.py needs Python 3.6 for enum support\n') ++ sys.exit(77) ++ + class _OpenIntEnum(enum.IntEnum): + """Integer enumeration that supports arbitrary int values.""" + @classmethod diff --git a/SOURCES/glibc-upstream-2.34-173.patch b/SOURCES/glibc-upstream-2.34-173.patch new file mode 100644 index 0000000..69a92b8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-173.patch @@ -0,0 +1,254 @@ +commit 16245986fb9bfe396113fc7dfd1929f69a9e748e +Author: H.J. Lu +Date: Fri Aug 20 06:42:24 2021 -0700 + + x86-64: Optimize load of all bits set into ZMM register [BZ #28252] + + Optimize loads of all bits set into ZMM register in AVX512 SVML codes + by replacing + + vpbroadcastq .L_2il0floatpacket.16(%rip), %zmmX + + and + + vmovups .L_2il0floatpacket.13(%rip), %zmmX + + with + vpternlogd $0xff, %zmmX, %zmmX, %zmmX + + This fixes BZ #28252. + + (cherry picked from commit 78c9ec9000f873abe7a15a91b87080a2e4308260) + +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +index e68fcdbb16a79f36..58e588a3d42a8bc9 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +@@ -265,7 +265,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos + vmovaps %zmm0, %zmm8 + + /* Check for large arguments path */ +- vpbroadcastq .L_2il0floatpacket.16(%rip), %zmm2 ++ vpternlogd $0xff, %zmm2, %zmm2, %zmm2 + + /* + ARGUMENT RANGE REDUCTION: +@@ -456,8 +456,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_cos_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.16: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.16,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +index dfa2acafc486b56b..f5f117d474f66176 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +@@ -274,7 +274,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log + + /* preserve mantissa, set input exponent to 2^(-10) */ + vpternlogq $248, _ExpMask(%rax), %zmm3, %zmm2 +- vpbroadcastq .L_2il0floatpacket.12(%rip), %zmm1 ++ vpternlogd $0xff, %zmm1, %zmm1, %zmm1 + vpsrlq $32, %zmm4, %zmm6 + + /* reciprocal approximation good to at least 11 bits */ +@@ -461,8 +461,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_log_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.12: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.12,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +index be8ab7c6e0e33819..48d251db16ccab9d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +@@ -261,7 +261,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin + andq $-64, %rsp + subq $1280, %rsp + movq __svml_d_trig_data@GOTPCREL(%rip), %rax +- vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14 ++ vpternlogd $0xff, %zmm1, %zmm1, %zmm14 + vmovups __dAbsMask(%rax), %zmm7 + vmovups __dInvPI(%rax), %zmm2 + vmovups __dRShifter(%rax), %zmm1 +@@ -458,8 +458,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_sin_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.14: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.14,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +index 611887082a545854..a4944a4feef6aa98 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +@@ -430,7 +430,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos + + /* SinPoly = SinR*SinPoly */ + vfmadd213pd %zmm5, %zmm5, %zmm4 +- vpbroadcastq .L_2il0floatpacket.15(%rip), %zmm3 ++ vpternlogd $0xff, %zmm3, %zmm3, %zmm3 + + /* Update Cos result's sign */ + vxorpd %zmm2, %zmm1, %zmm1 +@@ -741,8 +741,3 @@ END (_ZGVeN8vvv_sincos_knl) + ENTRY (_ZGVeN8vvv_sincos_skx) + WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx + END (_ZGVeN8vvv_sincos_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.15: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.15,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +index f671d60d5dab5a0e..fe8474fed943e8ad 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +@@ -278,7 +278,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf + X = X - Y*PI1 - Y*PI2 - Y*PI3 + */ + vmovaps %zmm0, %zmm6 +- vmovups .L_2il0floatpacket.13(%rip), %zmm12 ++ vpternlogd $0xff, %zmm12, %zmm12, %zmm12 + vmovups __sRShifter(%rax), %zmm3 + vmovups __sPI1_FMA(%rax), %zmm5 + vmovups __sA9_FMA(%rax), %zmm9 +@@ -453,8 +453,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf + jmp .LBL_2_7 + #endif + END (_ZGVeN16v_cosf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +index 637bfe3c06ab9ad4..229b7828cde04db2 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +@@ -264,7 +264,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf + vmovaps %zmm0, %zmm7 + + /* compare against threshold */ +- vmovups .L_2il0floatpacket.13(%rip), %zmm3 ++ vpternlogd $0xff, %zmm3, %zmm3, %zmm3 + vmovups __sInvLn2(%rax), %zmm4 + vmovups __sShifter(%rax), %zmm1 + vmovups __sLn2hi(%rax), %zmm6 +@@ -440,8 +440,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf + + #endif + END (_ZGVeN16v_expf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +index 9d790fbf0ad6c8ec..fa2aae986f543582 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +@@ -235,7 +235,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf + andq $-64, %rsp + subq $1280, %rsp + movq __svml_slog_data@GOTPCREL(%rip), %rax +- vmovups .L_2il0floatpacket.7(%rip), %zmm6 ++ vpternlogd $0xff, %zmm6, %zmm6, %zmm6 + vmovups _iBrkValue(%rax), %zmm4 + vmovups _sPoly_7(%rax), %zmm8 + +@@ -409,8 +409,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf + + #endif + END (_ZGVeN16v_logf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.7: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.7,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +index c5c43c46ff7af5a3..6aea2a4f11d1f85f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +@@ -385,7 +385,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + vpsrlq $32, %zmm3, %zmm2 + vpmovqd %zmm2, %ymm11 + vcvtps2pd %ymm14, %zmm13 +- vmovups .L_2il0floatpacket.23(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + vmovaps %zmm14, %zmm26 + vpandd _ABSMASK(%rax), %zmm1, %zmm8 + vpcmpd $1, _INF(%rax), %zmm8, %k2 +@@ -427,7 +427,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + vpmovqd %zmm11, %ymm5 + vpxord %zmm10, %zmm10, %zmm10 + vgatherdpd _Log2Rcp_lookup(%rax,%ymm4), %zmm10{%k3} +- vpbroadcastq .L_2il0floatpacket.24(%rip), %zmm4 ++ vpternlogd $0xff, %zmm4, %zmm4, %zmm4 + vpxord %zmm11, %zmm11, %zmm11 + vcvtdq2pd %ymm7, %zmm7 + vgatherdpd _Log2Rcp_lookup(%rax,%ymm5), %zmm11{%k1} +@@ -643,11 +643,3 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + jmp .LBL_2_7 + #endif + END (_ZGVeN16vv_powf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.23: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.23,@object +-.L_2il0floatpacket.24: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.24,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +index 9cf359c86ff9bd70..a446c504f63c9399 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +@@ -317,7 +317,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf + + /* Result sign calculations */ + vpternlogd $150, %zmm0, %zmm14, %zmm1 +- vmovups .L_2il0floatpacket.13(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + + /* Add correction term 0.5 for cos() part */ + vaddps %zmm8, %zmm5, %zmm15 +@@ -748,8 +748,3 @@ END (_ZGVeN16vvv_sincosf_knl) + ENTRY (_ZGVeN16vvv_sincosf_skx) + WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx + END (_ZGVeN16vvv_sincosf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +index bd05109a62181f22..c1b352d0ad1992cd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +@@ -280,7 +280,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf + movq __svml_s_trig_data@GOTPCREL(%rip), %rax + + /* Check for large and special values */ +- vmovups .L_2il0floatpacket.11(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + vmovups __sAbsMask(%rax), %zmm5 + vmovups __sInvPI(%rax), %zmm1 + vmovups __sRShifter(%rax), %zmm2 +@@ -472,8 +472,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf + jmp .LBL_2_7 + #endif + END (_ZGVeN16v_sinf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.11: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.11,@object diff --git a/SOURCES/glibc-upstream-2.34-174.patch b/SOURCES/glibc-upstream-2.34-174.patch new file mode 100644 index 0000000..3bf44a8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-174.patch @@ -0,0 +1,42 @@ +commit b5a44a6a471aafd3677659a610f32468c40a666b +Author: Noah Goldstein +Date: Tue Sep 21 18:31:49 2021 -0500 + + x86: Modify ENTRY in sysdep.h so that p2align can be specified + + No bug. + + This change adds a new macro ENTRY_P2ALIGN which takes a second + argument, log2 of the desired function alignment. + + The old ENTRY(name) macro is just ENTRY_P2ALIGN(name, 4) so this + doesn't affect any existing functionality. + + Signed-off-by: Noah Goldstein + (cherry picked from commit fc5bd179ef3a953dff8d1655bd530d0e230ffe71) + +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index cac1d762fb3f99d0..937180c1bd791570 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -78,15 +78,18 @@ enum cf_protection_level + #define ASM_SIZE_DIRECTIVE(name) .size name,.-name; + + /* Define an entry point visible from C. */ +-#define ENTRY(name) \ ++#define ENTRY_P2ALIGN(name, alignment) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ +- .align ALIGNARG(4); \ ++ .align ALIGNARG(alignment); \ + C_LABEL(name) \ + cfi_startproc; \ + _CET_ENDBR; \ + CALL_MCOUNT + ++/* Common entry 16 byte aligns. */ ++#define ENTRY(name) ENTRY_P2ALIGN (name, 4) ++ + #undef END + #define END(name) \ + cfi_endproc; \ diff --git a/SOURCES/glibc-upstream-2.34-175.patch b/SOURCES/glibc-upstream-2.34-175.patch new file mode 100644 index 0000000..5ebf0b7 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-175.patch @@ -0,0 +1,653 @@ +commit 5ec3416853c4150c4d13312e05f93a053586d528 +Author: Noah Goldstein +Date: Tue Sep 21 18:45:03 2021 -0500 + + x86: Optimize memcmp-evex-movbe.S for frontend behavior and size + + No bug. + + The frontend optimizations are to: + 1. Reorganize logically connected basic blocks so they are either in + the same cache line or adjacent cache lines. + 2. Avoid cases when basic blocks unnecissarily cross cache lines. + 3. Try and 32 byte align any basic blocks possible without sacrificing + code size. Smaller / Less hot basic blocks are used for this. + + Overall code size shrunk by 168 bytes. This should make up for any + extra costs due to aligning to 64 bytes. + + In general performance before deviated a great deal dependending on + whether entry alignment % 64 was 0, 16, 32, or 48. These changes + essentially make it so that the current implementation is at least + equal to the best alignment of the original for any arguments. + + The only additional optimization is in the page cross case. Branch on + equals case was removed from the size == [4, 7] case. As well the [4, + 7] and [2, 3] case where swapped as [4, 7] is likely a more hot + argument size. + + test-memcmp and test-wmemcmp are both passing. + + (cherry picked from commit 1bd8b8d58fc9967cc073d2c13bfb6befefca2faa) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 654dc7ac8ccb9445..2761b54f2e7dea9f 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -34,7 +34,24 @@ + area. + 7. Use 2 vector compares when size is 2 * CHAR_PER_VEC or less. + 8. Use 4 vector compares when size is 4 * CHAR_PER_VEC or less. +- 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less. */ ++ 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less. ++ ++When possible the implementation tries to optimize for frontend in the ++following ways: ++Throughput: ++ 1. All code sections that fit are able to run optimally out of the ++ LSD. ++ 2. All code sections that fit are able to run optimally out of the ++ DSB ++ 3. Basic blocks are contained in minimum number of fetch blocks ++ necessary. ++ ++Latency: ++ 1. Logically connected basic blocks are put in the same ++ cache-line. ++ 2. Logically connected basic blocks that do not fit in the same ++ cache-line are put in adjacent lines. This can get beneficial ++ L2 spatial prefetching and L1 next-line prefetching. */ + + # include + +@@ -47,9 +64,11 @@ + # ifdef USE_AS_WMEMCMP + # define CHAR_SIZE 4 + # define VPCMP vpcmpd ++# define VPTEST vptestmd + # else + # define CHAR_SIZE 1 + # define VPCMP vpcmpub ++# define VPTEST vptestmb + # endif + + # define VEC_SIZE 32 +@@ -75,7 +94,9 @@ + */ + + .section .text.evex,"ax",@progbits +-ENTRY (MEMCMP) ++/* Cache align memcmp entry. This allows for much more thorough ++ frontend optimization. */ ++ENTRY_P2ALIGN (MEMCMP, 6) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +@@ -89,7 +110,7 @@ ENTRY (MEMCMP) + VPCMP $4, (%rdi), %YMM1, %k1 + kmovd %k1, %eax + /* NB: eax must be destination register if going to +- L(return_vec_[0,2]). For L(return_vec_3 destination register ++ L(return_vec_[0,2]). For L(return_vec_3) destination register + must be ecx. */ + testl %eax, %eax + jnz L(return_vec_0) +@@ -121,10 +142,6 @@ ENTRY (MEMCMP) + testl %ecx, %ecx + jnz L(return_vec_3) + +- /* Zero YMM0. 4x VEC reduction is done with vpxor + vtern so +- compare with zero to get a mask is needed. */ +- vpxorq %XMM0, %XMM0, %XMM0 +- + /* Go to 4x VEC loop. */ + cmpq $(CHAR_PER_VEC * 8), %rdx + ja L(more_8x_vec) +@@ -148,47 +165,61 @@ ENTRY (MEMCMP) + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 +- /* Or together YMM1, YMM2, and YMM3 into YMM3. */ +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + /* Ternary logic to xor (VEC_SIZE * 3)(%rdi) with YMM4 while +- oring with YMM3. Result is stored in YMM4. */ +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4 +- /* Compare YMM4 with 0. If any 1s s1 and s2 don't match. */ +- VPCMP $4, %YMM4, %YMM0, %k1 ++ oring with YMM1. Result is stored in YMM4. */ ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ ++ /* Or together YMM2, YMM3, and YMM4 into YMM4. */ ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ ++ /* Test YMM4 against itself. Store any CHAR mismatches in k1. ++ */ ++ VPTEST %YMM4, %YMM4, %k1 ++ /* k1 must go to ecx for L(return_vec_0_1_2_3). */ + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(return_vec_0_1_2_3) + /* NB: eax must be zero to reach here. */ + ret + +- /* NB: aligning 32 here allows for the rest of the jump targets +- to be tuned for 32 byte alignment. Most important this ensures +- the L(more_8x_vec) loop is 32 byte aligned. */ +- .p2align 5 +-L(less_vec): +- /* Check if one or less CHAR. This is necessary for size = 0 but +- is also faster for size = CHAR_SIZE. */ +- cmpl $1, %edx +- jbe L(one_or_less) ++ .p2align 4 ++L(8x_end_return_vec_0_1_2_3): ++ movq %rdx, %rdi ++L(8x_return_vec_0_1_2_3): ++ addq %rdi, %rsi ++L(return_vec_0_1_2_3): ++ VPTEST %YMM1, %YMM1, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_0) + +- /* Check if loading one VEC from either s1 or s2 could cause a +- page cross. This can have false positives but is by far the +- fastest method. */ +- movl %edi, %eax +- orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(page_cross_less_vec) ++ VPTEST %YMM2, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_1) + +- /* No page cross possible. */ +- VMOVU (%rsi), %YMM2 +- VPCMP $4, (%rdi), %YMM2, %k1 +- kmovd %k1, %eax +- /* Create mask in ecx for potentially in bound matches. */ +- bzhil %edx, %eax, %eax +- jnz L(return_vec_0) ++ VPTEST %YMM3, %YMM3, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_2) ++L(return_vec_3): ++ /* bsf saves 1 byte from tzcnt. This keep L(return_vec_3) in one ++ fetch block and the entire L(*return_vec_0_1_2_3) in 1 cache ++ line. */ ++ bsfl %ecx, %ecx ++# ifdef USE_AS_WMEMCMP ++ movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax ++ xorl %edx, %edx ++ cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++# endif + ret + + .p2align 4 +@@ -209,10 +240,11 @@ L(return_vec_0): + # endif + ret + +- /* NB: No p2align necessary. Alignment % 16 is naturally 1 +- which is good enough for a target not in a loop. */ ++ .p2align 4 + L(return_vec_1): +- tzcntl %eax, %eax ++ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_1) in one ++ fetch block. */ ++ bsfl %eax, %eax + # ifdef USE_AS_WMEMCMP + movl VEC_SIZE(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +@@ -226,10 +258,11 @@ L(return_vec_1): + # endif + ret + +- /* NB: No p2align necessary. Alignment % 16 is naturally 2 +- which is good enough for a target not in a loop. */ ++ .p2align 4,, 10 + L(return_vec_2): +- tzcntl %eax, %eax ++ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_2) in one ++ fetch block. */ ++ bsfl %eax, %eax + # ifdef USE_AS_WMEMCMP + movl (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +@@ -243,40 +276,6 @@ L(return_vec_2): + # endif + ret + +- .p2align 4 +-L(8x_return_vec_0_1_2_3): +- /* Returning from L(more_8x_vec) requires restoring rsi. */ +- addq %rdi, %rsi +-L(return_vec_0_1_2_3): +- VPCMP $4, %YMM1, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_0) +- +- VPCMP $4, %YMM2, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_1) +- +- VPCMP $4, %YMM3, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_2) +-L(return_vec_3): +- tzcntl %ecx, %ecx +-# ifdef USE_AS_WMEMCMP +- movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax +- xorl %edx, %edx +- cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax +- movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx +- subl %ecx, %eax +-# endif +- ret +- + .p2align 4 + L(more_8x_vec): + /* Set end of s1 in rdx. */ +@@ -288,21 +287,19 @@ L(more_8x_vec): + andq $-VEC_SIZE, %rdi + /* Adjust because first 4x vec where check already. */ + subq $-(VEC_SIZE * 4), %rdi ++ + .p2align 4 + L(loop_4x_vec): + VMOVU (%rsi, %rdi), %YMM1 + vpxorq (%rdi), %YMM1, %YMM1 +- + VMOVU VEC_SIZE(%rsi, %rdi), %YMM2 + vpxorq VEC_SIZE(%rdi), %YMM2, %YMM2 +- + VMOVU (VEC_SIZE * 2)(%rsi, %rdi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 +- + VMOVU (VEC_SIZE * 3)(%rsi, %rdi), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4 +- VPCMP $4, %YMM4, %YMM0, %k1 ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(8x_return_vec_0_1_2_3) +@@ -319,28 +316,25 @@ L(loop_4x_vec): + cmpl $(VEC_SIZE * 2), %edi + jae L(8x_last_2x_vec) + ++ vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3 ++ + VMOVU (%rsi, %rdx), %YMM1 + vpxorq (%rdx), %YMM1, %YMM1 + + VMOVU VEC_SIZE(%rsi, %rdx), %YMM2 + vpxorq VEC_SIZE(%rdx), %YMM2, %YMM2 +- +- vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3 +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 +- + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM3, %YMM4 +- VPCMP $4, %YMM4, %YMM0, %k1 ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +- /* Restore s1 pointer to rdi. */ +- movq %rdx, %rdi + testl %ecx, %ecx +- jnz L(8x_return_vec_0_1_2_3) ++ jnz L(8x_end_return_vec_0_1_2_3) + /* NB: eax must be zero to reach here. */ + ret + + /* Only entry is from L(more_8x_vec). */ +- .p2align 4 ++ .p2align 4,, 10 + L(8x_last_2x_vec): + VPCMP $4, (VEC_SIZE * 2)(%rdx), %YMM3, %k1 + kmovd %k1, %eax +@@ -355,7 +349,31 @@ L(8x_last_1x_vec): + jnz L(8x_return_vec_3) + ret + +- .p2align 4 ++ /* Not ideally aligned (at offset +9 bytes in fetch block) but ++ not aligning keeps it in the same cache line as ++ L(8x_last_1x/2x_vec) so likely worth it. As well, saves code ++ size. */ ++ .p2align 4,, 4 ++L(8x_return_vec_2): ++ subq $VEC_SIZE, %rdx ++L(8x_return_vec_3): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ leaq (%rdx, %rax, CHAR_SIZE), %rax ++ movl (VEC_SIZE * 3)(%rax), %ecx ++ xorl %edx, %edx ++ cmpl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ addq %rdx, %rax ++ movzbl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ movzbl (VEC_SIZE * 3)(%rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4,, 10 + L(last_2x_vec): + /* Check second to last VEC. */ + VMOVU -(VEC_SIZE * 2)(%rsi, %rdx, CHAR_SIZE), %YMM1 +@@ -374,26 +392,49 @@ L(last_1x_vec): + jnz L(return_vec_0_end) + ret + +- .p2align 4 +-L(8x_return_vec_2): +- subq $VEC_SIZE, %rdx +-L(8x_return_vec_3): +- tzcntl %eax, %eax ++ .p2align 4,, 10 ++L(return_vec_1_end): ++ /* Use bsf to save code size. This is necessary to have ++ L(one_or_less) fit in aligning bytes between. */ ++ bsfl %eax, %eax ++ addl %edx, %eax + # ifdef USE_AS_WMEMCMP +- leaq (%rdx, %rax, CHAR_SIZE), %rax +- movl (VEC_SIZE * 3)(%rax), %ecx ++ movl -(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +- cmpl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ cmpl -(VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %ecx + setg %dl + leal -1(%rdx, %rdx), %eax + # else +- addq %rdx, %rax +- movzbl (VEC_SIZE * 3)(%rsi, %rax), %ecx +- movzbl (VEC_SIZE * 3)(%rax), %eax ++ movzbl -(VEC_SIZE * 2)(%rsi, %rax), %ecx ++ movzbl -(VEC_SIZE * 2)(%rdi, %rax), %eax + subl %ecx, %eax + # endif + ret + ++ /* NB: L(one_or_less) fits in alignment padding between ++ L(return_vec_1_end) and L(return_vec_0_end). */ ++# ifdef USE_AS_WMEMCMP ++L(one_or_less): ++ jb L(zero) ++ movl (%rdi), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi), %ecx ++ je L(zero) ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++ ret ++# else ++L(one_or_less): ++ jb L(zero) ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax ++ subl %ecx, %eax ++ ret ++# endif ++L(zero): ++ xorl %eax, %eax ++ ret ++ + .p2align 4 + L(return_vec_0_end): + tzcntl %eax, %eax +@@ -412,23 +453,56 @@ L(return_vec_0_end): + ret + + .p2align 4 +-L(return_vec_1_end): ++L(less_vec): ++ /* Check if one or less CHAR. This is necessary for size == 0 ++ but is also faster for size == CHAR_SIZE. */ ++ cmpl $1, %edx ++ jbe L(one_or_less) ++ ++ /* Check if loading one VEC from either s1 or s2 could cause a ++ page cross. This can have false positives but is by far the ++ fastest method. */ ++ movl %edi, %eax ++ orl %esi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jg L(page_cross_less_vec) ++ ++ /* No page cross possible. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMP $4, (%rdi), %YMM2, %k1 ++ kmovd %k1, %eax ++ /* Check if any matches where in bounds. Intentionally not ++ storing result in eax to limit dependency chain if it goes to ++ L(return_vec_0_lv). */ ++ bzhil %edx, %eax, %edx ++ jnz L(return_vec_0_lv) ++ xorl %eax, %eax ++ ret ++ ++ /* Essentially duplicate of L(return_vec_0). Ends up not costing ++ any code as shrinks L(less_vec) by allowing 2-byte encoding of ++ the jump and ends up fitting in aligning bytes. As well fits on ++ same cache line as L(less_vec) so also saves a line from having ++ to be fetched on cold calls to memcmp. */ ++ .p2align 4,, 4 ++L(return_vec_0_lv): + tzcntl %eax, %eax +- addl %edx, %eax + # ifdef USE_AS_WMEMCMP +- movl -(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx ++ movl (%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +- cmpl -(VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %ecx ++ cmpl (%rsi, %rax, CHAR_SIZE), %ecx ++ /* NB: no partial register stall here because xorl zero idiom ++ above. */ + setg %dl + leal -1(%rdx, %rdx), %eax + # else +- movzbl -(VEC_SIZE * 2)(%rsi, %rax), %ecx +- movzbl -(VEC_SIZE * 2)(%rdi, %rax), %eax ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax + subl %ecx, %eax + # endif + ret + +- + .p2align 4 + L(page_cross_less_vec): + /* if USE_AS_WMEMCMP it can only be 0, 4, 8, 12, 16, 20, 24, 28 +@@ -439,108 +513,84 @@ L(page_cross_less_vec): + cmpl $8, %edx + jae L(between_8_15) + cmpl $4, %edx +- jae L(between_4_7) +-L(between_2_3): +- /* Load as big endian to avoid branches. */ +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- /* Subtraction is okay because the upper 8 bits are zero. */ +- subl %ecx, %eax +- ret +- .p2align 4 +-L(one_or_less): +- jb L(zero) +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax ++ jb L(between_2_3) ++ ++ /* Load as big endian with overlapping movbe to avoid branches. ++ */ ++ movbe (%rdi), %eax ++ movbe (%rsi), %ecx ++ shlq $32, %rax ++ shlq $32, %rcx ++ movbe -4(%rdi, %rdx), %edi ++ movbe -4(%rsi, %rdx), %esi ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ /* edx is guranteed to be positive int32 in range [4, 7]. */ ++ cmovne %edx, %eax ++ /* ecx is -1 if rcx > rax. Otherwise 0. */ ++ sbbl %ecx, %ecx ++ /* If rcx > rax, then ecx is 0 and eax is positive. If rcx == ++ rax then eax and ecx are zero. If rax < rax then ecx is -1 so ++ eax doesn't matter. */ ++ orl %ecx, %eax + ret + +- .p2align 4 ++ .p2align 4,, 8 + L(between_8_15): + # endif + /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */ +- vmovq (%rdi), %XMM1 +- vmovq (%rsi), %XMM2 +- VPCMP $4, %XMM1, %XMM2, %k1 ++ vmovq (%rdi), %xmm1 ++ vmovq (%rsi), %xmm2 ++ VPCMP $4, %xmm1, %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) ++ jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- leaq -8(%rdi, %rdx, CHAR_SIZE), %rdi +- leaq -8(%rsi, %rdx, CHAR_SIZE), %rsi +- vmovq (%rdi), %XMM1 +- vmovq (%rsi), %XMM2 +- VPCMP $4, %XMM1, %XMM2, %k1 ++ vmovq -8(%rdi, %rdx, CHAR_SIZE), %xmm1 ++ vmovq -8(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ VPCMP $4, %xmm1, %xmm2, %k1 ++ addl $(CHAR_PER_VEC - (8 / CHAR_SIZE)), %edx + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ret +- +- .p2align 4 +-L(zero): +- xorl %eax, %eax ++ jnz L(return_vec_0_end) + ret + +- .p2align 4 ++ .p2align 4,, 8 + L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ +- VMOVU (%rsi), %XMM2 +- VPCMP $4, (%rdi), %XMM2, %k1 ++ ++ /* Use movups to save code size. */ ++ movups (%rsi), %xmm2 ++ VPCMP $4, (%rdi), %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ++ jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- +- VMOVU -16(%rsi, %rdx, CHAR_SIZE), %XMM2 +- leaq -16(%rdi, %rdx, CHAR_SIZE), %rdi +- leaq -16(%rsi, %rdx, CHAR_SIZE), %rsi +- VPCMP $4, (%rdi), %XMM2, %k1 ++ movups -16(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 ++ addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ret +- +-# ifdef USE_AS_WMEMCMP +- .p2align 4 +-L(one_or_less): +- jb L(zero) +- movl (%rdi), %ecx +- xorl %edx, %edx +- cmpl (%rsi), %ecx +- je L(zero) +- setg %dl +- leal -1(%rdx, %rdx), %eax ++ jnz L(return_vec_0_end) + ret +-# else + +- .p2align 4 +-L(between_4_7): +- /* Load as big endian with overlapping movbe to avoid branches. +- */ +- movbe (%rdi), %eax +- movbe (%rsi), %ecx +- shlq $32, %rax +- shlq $32, %rcx +- movbe -4(%rdi, %rdx), %edi +- movbe -4(%rsi, %rdx), %esi +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- jz L(zero_4_7) +- sbbl %eax, %eax +- orl $1, %eax +-L(zero_4_7): ++# ifndef USE_AS_WMEMCMP ++L(between_2_3): ++ /* Load as big endian to avoid branches. */ ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ shll $8, %eax ++ shll $8, %ecx ++ bswap %eax ++ bswap %ecx ++ movzbl -1(%rdi, %rdx), %edi ++ movzbl -1(%rsi, %rdx), %esi ++ orl %edi, %eax ++ orl %esi, %ecx ++ /* Subtraction is okay because the upper 8 bits are zero. */ ++ subl %ecx, %eax + ret + # endif +- + END (MEMCMP) + #endif diff --git a/SOURCES/glibc-upstream-2.34-176.patch b/SOURCES/glibc-upstream-2.34-176.patch new file mode 100644 index 0000000..74b18ab --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-176.patch @@ -0,0 +1,497 @@ +commit 6d18a93dbbde2958001d65dff3080beed7ae675a +Author: Noah Goldstein +Date: Mon Sep 20 16:20:15 2021 -0500 + + x86: Optimize memset-vec-unaligned-erms.S + + No bug. + + Optimization are + + 1. change control flow for L(more_2x_vec) to fall through to loop and + jump for L(less_4x_vec) and L(less_8x_vec). This uses less code + size and saves jumps for length > 4x VEC_SIZE. + + 2. For EVEX/AVX512 move L(less_vec) closer to entry. + + 3. Avoid complex address mode for length > 2x VEC_SIZE + + 4. Slightly better aligning code for the loop from the perspective of + code size and uops. + + 5. Align targets so they make full use of their fetch block and if + possible cache line. + + 6. Try and reduce total number of icache lines that will need to be + pulled in for a given length. + + 7. Include "local" version of stosb target. For AVX2/EVEX/AVX512 + jumping to the stosb target in the sse2 code section will almost + certainly be to a new page. The new version does increase code size + marginally by duplicating the target but should get better iTLB + behavior as a result. + + test-memset, test-wmemset, and test-bzero are all passing. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit e59ced238482fd71f3e493717f14f6507346741e) + +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 7d4a327eba29ecb4..0137eba4cdd9f830 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -18,13 +18,15 @@ + . */ + + #include ++#define USE_WITH_SSE2 1 + + #define VEC_SIZE 16 ++#define MOV_SIZE 3 ++#define RET_SIZE 1 ++ + #define VEC(i) xmm##i +-/* Don't use movups and movaps since it will get larger nop paddings for +- alignment. */ +-#define VMOVU movdqu +-#define VMOVA movdqa ++#define VMOVU movups ++#define VMOVA movaps + + #define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index ae0860f36a47d594..1af668af0aeda59e 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -1,8 +1,14 @@ + #if IS_IN (libc) ++# define USE_WITH_AVX2 1 ++ + # define VEC_SIZE 32 ++# define MOV_SIZE 4 ++# define RET_SIZE 4 ++ + # define VEC(i) ymm##i +-# define VMOVU vmovdqu +-# define VMOVA vmovdqa ++ ++# define VMOVU vmovdqu ++# define VMOVA vmovdqa + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index 8ad842fc2f140527..f14d6f8493c21a36 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -1,11 +1,18 @@ + #if IS_IN (libc) ++# define USE_WITH_AVX512 1 ++ + # define VEC_SIZE 64 ++# define MOV_SIZE 6 ++# define RET_SIZE 1 ++ + # define XMM0 xmm16 + # define YMM0 ymm16 + # define VEC0 zmm16 + # define VEC(i) VEC##i +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ + # define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +index 640f092903302ad0..64b09e77cc20cc42 100644 +--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -1,11 +1,18 @@ + #if IS_IN (libc) ++# define USE_WITH_EVEX 1 ++ + # define VEC_SIZE 32 ++# define MOV_SIZE 6 ++# define RET_SIZE 1 ++ + # define XMM0 xmm16 + # define YMM0 ymm16 + # define VEC0 ymm16 + # define VEC(i) VEC##i +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ + # define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index ff196844a093dc3b..e723413a664c088f 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -63,8 +63,27 @@ + # endif + #endif + ++#if VEC_SIZE == 64 ++# define LOOP_4X_OFFSET (VEC_SIZE * 4) ++#else ++# define LOOP_4X_OFFSET (0) ++#endif ++ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++# define END_REG rcx ++# define LOOP_REG rdi ++#else ++# define END_REG rdi ++# define LOOP_REG rdx ++#endif ++ + #define PAGE_SIZE 4096 + ++/* Macro to calculate size of small memset block for aligning ++ purposes. */ ++#define SMALL_MEMSET_ALIGN(mov_sz, ret_sz) (2 * (mov_sz) + (ret_sz) + 1) ++ ++ + #ifndef SECTION + # error SECTION is not defined! + #endif +@@ -74,6 +93,7 @@ + ENTRY (__bzero) + mov %RDI_LP, %RAX_LP /* Set return value. */ + mov %RSI_LP, %RDX_LP /* Set n. */ ++ xorl %esi, %esi + pxor %XMM0, %XMM0 + jmp L(entry_from_bzero) + END (__bzero) +@@ -158,7 +178,7 @@ ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + # endif + +-ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) ++ENTRY_P2ALIGN (MEMSET_SYMBOL (__memset, unaligned_erms), 6) + MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ +@@ -168,75 +188,43 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) + jb L(less_vec) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(stosb_more_2x_vec) +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(0), (%rdi) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. ++ */ ++ VMOVU %VEC(0), (%rax) ++ VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx) + VZEROUPPER_RETURN +- +- .p2align 4 +-L(stosb_more_2x_vec): +- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP +- ja L(stosb) +-#else +- .p2align 4 + #endif +-L(more_2x_vec): +- /* Stores to first 2x VEC before cmp as any path forward will +- require it. */ +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(0), VEC_SIZE(%rdi) +- cmpq $(VEC_SIZE * 4), %rdx +- ja L(loop_start) +- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) +- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) +-L(return): +-#if VEC_SIZE > 16 +- ZERO_UPPER_VEC_REGISTERS_RETURN ++ ++ .p2align 4,, 10 ++L(last_2x_vec): ++#ifdef USE_LESS_VEC_MASK_STORE ++ VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%rcx) ++ VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%rcx) + #else +- ret ++ VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi) + #endif ++ VZEROUPPER_RETURN + +-L(loop_start): +- VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi) +- VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi) +- cmpq $(VEC_SIZE * 8), %rdx +- jbe L(loop_end) +- andq $-(VEC_SIZE * 2), %rdi +- subq $-(VEC_SIZE * 4), %rdi +- leaq -(VEC_SIZE * 4)(%rax, %rdx), %rcx +- .p2align 4 +-L(loop): +- VMOVA %VEC(0), (%rdi) +- VMOVA %VEC(0), VEC_SIZE(%rdi) +- VMOVA %VEC(0), (VEC_SIZE * 2)(%rdi) +- VMOVA %VEC(0), (VEC_SIZE * 3)(%rdi) +- subq $-(VEC_SIZE * 4), %rdi +- cmpq %rcx, %rdi +- jb L(loop) +-L(loop_end): +- /* NB: rax is set as ptr in MEMSET_VDUP_TO_VEC0_AND_SET_RETURN. +- rdx as length is also unchanged. */ +- VMOVU %VEC(0), -(VEC_SIZE * 4)(%rax, %rdx) +- VMOVU %VEC(0), -(VEC_SIZE * 3)(%rax, %rdx) +- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rax, %rdx) +- VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx) +- VZEROUPPER_SHORT_RETURN +- +- .p2align 4 ++ /* If have AVX512 mask instructions put L(less_vec) close to ++ entry as it doesn't take much space and is likely a hot target. ++ */ ++#ifdef USE_LESS_VEC_MASK_STORE ++ .p2align 4,, 10 + L(less_vec): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! + # endif +-# ifdef USE_LESS_VEC_MASK_STORE + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. Note that we are using rax which is set in +- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. +- */ ++ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */ + andl $(PAGE_SIZE - 1), %edi +- /* Check if VEC_SIZE store cross page. Mask stores suffer serious +- performance degradation when it has to fault supress. */ ++ /* Check if VEC_SIZE store cross page. Mask stores suffer ++ serious performance degradation when it has to fault supress. ++ */ + cmpl $(PAGE_SIZE - VEC_SIZE), %edi ++ /* This is generally considered a cold target. */ + ja L(cross_page) + # if VEC_SIZE > 32 + movq $-1, %rcx +@@ -247,58 +235,185 @@ L(less_vec): + bzhil %edx, %ecx, %ecx + kmovd %ecx, %k1 + # endif +- vmovdqu8 %VEC(0), (%rax) {%k1} ++ vmovdqu8 %VEC(0), (%rax){%k1} + VZEROUPPER_RETURN + ++# if defined USE_MULTIARCH && IS_IN (libc) ++ /* Include L(stosb_local) here if including L(less_vec) between ++ L(stosb_more_2x_vec) and ENTRY. This is to cache align the ++ L(stosb_more_2x_vec) target. */ ++ .p2align 4,, 10 ++L(stosb_local): ++ movzbl %sil, %eax ++ mov %RDX_LP, %RCX_LP ++ mov %RDI_LP, %RDX_LP ++ rep stosb ++ mov %RDX_LP, %RAX_LP ++ VZEROUPPER_RETURN ++# endif ++#endif ++ ++#if defined USE_MULTIARCH && IS_IN (libc) + .p2align 4 +-L(cross_page): ++L(stosb_more_2x_vec): ++ cmp __x86_rep_stosb_threshold(%rip), %RDX_LP ++ ja L(stosb_local) ++#endif ++ /* Fallthrough goes to L(loop_4x_vec). Tests for memset (2x, 4x] ++ and (4x, 8x] jump to target. */ ++L(more_2x_vec): ++ ++ /* Two different methods of setting up pointers / compare. The ++ two methods are based on the fact that EVEX/AVX512 mov ++ instructions take more bytes then AVX2/SSE2 mov instructions. As ++ well that EVEX/AVX512 machines also have fast LEA_BID. Both ++ setup and END_REG to avoid complex address mode. For EVEX/AVX512 ++ this saves code size and keeps a few targets in one fetch block. ++ For AVX2/SSE2 this helps prevent AGU bottlenecks. */ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++ /* If EVEX/AVX512 compute END_REG - (VEC_SIZE * 4 + ++ LOOP_4X_OFFSET) with LEA_BID. */ ++ ++ /* END_REG is rcx for EVEX/AVX512. */ ++ leaq -(VEC_SIZE * 4 + LOOP_4X_OFFSET)(%rdi, %rdx), %END_REG ++#endif ++ ++ /* Stores to first 2x VEC before cmp as any path forward will ++ require it. */ ++ VMOVU %VEC(0), (%rax) ++ VMOVU %VEC(0), VEC_SIZE(%rax) ++ ++ ++#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512) ++ /* If AVX2/SSE2 compute END_REG (rdi) with ALU. */ ++ addq %rdx, %END_REG ++#endif ++ ++ cmpq $(VEC_SIZE * 4), %rdx ++ jbe L(last_2x_vec) ++ ++ /* Store next 2x vec regardless. */ ++ VMOVU %VEC(0), (VEC_SIZE * 2)(%rax) ++ VMOVU %VEC(0), (VEC_SIZE * 3)(%rax) ++ ++ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++ /* If LOOP_4X_OFFSET don't readjust LOOP_REG (rdi), just add ++ extra offset to addresses in loop. Used for AVX512 to save space ++ as no way to get (VEC_SIZE * 4) in imm8. */ ++# if LOOP_4X_OFFSET == 0 ++ subq $-(VEC_SIZE * 4), %LOOP_REG + # endif +-# if VEC_SIZE > 32 +- cmpb $32, %dl +- jae L(between_32_63) ++ /* Avoid imm32 compare here to save code size. */ ++ cmpq %rdi, %rcx ++#else ++ addq $-(VEC_SIZE * 4), %END_REG ++ cmpq $(VEC_SIZE * 8), %rdx ++#endif ++ jbe L(last_4x_vec) ++#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512) ++ /* Set LOOP_REG (rdx). */ ++ leaq (VEC_SIZE * 4)(%rax), %LOOP_REG ++#endif ++ /* Align dst for loop. */ ++ andq $(VEC_SIZE * -2), %LOOP_REG ++ .p2align 4 ++L(loop): ++ VMOVA %VEC(0), LOOP_4X_OFFSET(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%LOOP_REG) ++ subq $-(VEC_SIZE * 4), %LOOP_REG ++ cmpq %END_REG, %LOOP_REG ++ jb L(loop) ++ .p2align 4,, MOV_SIZE ++L(last_4x_vec): ++ VMOVU %VEC(0), LOOP_4X_OFFSET(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%END_REG) ++L(return): ++#if VEC_SIZE > 16 ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++#else ++ ret ++#endif ++ ++ .p2align 4,, 10 ++#ifndef USE_LESS_VEC_MASK_STORE ++# if defined USE_MULTIARCH && IS_IN (libc) ++ /* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in ++ range for 2-byte jump encoding. */ ++L(stosb_local): ++ movzbl %sil, %eax ++ mov %RDX_LP, %RCX_LP ++ mov %RDI_LP, %RDX_LP ++ rep stosb ++ mov %RDX_LP, %RAX_LP ++ VZEROUPPER_RETURN + # endif +-# if VEC_SIZE > 16 +- cmpb $16, %dl ++ /* Define L(less_vec) only if not otherwise defined. */ ++ .p2align 4 ++L(less_vec): ++#endif ++L(cross_page): ++#if VEC_SIZE > 32 ++ cmpl $32, %edx ++ jae L(between_32_63) ++#endif ++#if VEC_SIZE > 16 ++ cmpl $16, %edx + jae L(between_16_31) +-# endif +- MOVQ %XMM0, %rcx +- cmpb $8, %dl ++#endif ++ MOVQ %XMM0, %rdi ++ cmpl $8, %edx + jae L(between_8_15) +- cmpb $4, %dl ++ cmpl $4, %edx + jae L(between_4_7) +- cmpb $1, %dl ++ cmpl $1, %edx + ja L(between_2_3) +- jb 1f +- movb %cl, (%rax) +-1: ++ jb L(return) ++ movb %sil, (%rax) + VZEROUPPER_RETURN +-# if VEC_SIZE > 32 ++ ++ /* Align small targets only if not doing so would cross a fetch ++ line. */ ++#if VEC_SIZE > 32 ++ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) + /* From 32 to 63. No branch when size == 32. */ + L(between_32_63): +- VMOVU %YMM0, -32(%rax,%rdx) + VMOVU %YMM0, (%rax) ++ VMOVU %YMM0, -32(%rax, %rdx) + VZEROUPPER_RETURN +-# endif +-# if VEC_SIZE > 16 +- /* From 16 to 31. No branch when size == 16. */ ++#endif ++ ++#if VEC_SIZE >= 32 ++ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) + L(between_16_31): +- VMOVU %XMM0, -16(%rax,%rdx) ++ /* From 16 to 31. No branch when size == 16. */ + VMOVU %XMM0, (%rax) ++ VMOVU %XMM0, -16(%rax, %rdx) + VZEROUPPER_RETURN +-# endif +- /* From 8 to 15. No branch when size == 8. */ ++#endif ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) + L(between_8_15): +- movq %rcx, -8(%rax,%rdx) +- movq %rcx, (%rax) ++ /* From 8 to 15. No branch when size == 8. */ ++ movq %rdi, (%rax) ++ movq %rdi, -8(%rax, %rdx) + VZEROUPPER_RETURN ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(2, RET_SIZE) + L(between_4_7): + /* From 4 to 7. No branch when size == 4. */ +- movl %ecx, -4(%rax,%rdx) +- movl %ecx, (%rax) ++ movl %edi, (%rax) ++ movl %edi, -4(%rax, %rdx) + VZEROUPPER_RETURN ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ +- movw %cx, -2(%rax,%rdx) +- movw %cx, (%rax) ++ movw %di, (%rax) ++ movb %dil, -1(%rax, %rdx) + VZEROUPPER_RETURN + END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/SOURCES/glibc-upstream-2.34-177.patch b/SOURCES/glibc-upstream-2.34-177.patch new file mode 100644 index 0000000..112bcad --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-177.patch @@ -0,0 +1,40 @@ +commit baf3ece63453adac59c5688930324a78ced5b2e4 +Author: Noah Goldstein +Date: Sat Oct 23 01:26:47 2021 -0400 + + x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S + + This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'. + + it could potentially be dangerous to use SSE2 if this function is ever + called without using 'vzeroupper' beforehand. While compilers appear + to use 'vzeroupper' before function calls if AVX2 has been used, using + SSE2 here is more brittle. Since it is not absolutely necessary it + should be avoided. + + It costs 2-extra bytes but the extra bytes should only eat into + alignment padding. + Reviewed-by: H.J. Lu + + (cherry picked from commit bad852b61b79503fcb3c5fc379c70f768df3e1fb) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 2761b54f2e7dea9f..640f6757fac8a356 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -561,13 +561,13 @@ L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ + + /* Use movups to save code size. */ +- movups (%rsi), %xmm2 ++ vmovdqu (%rsi), %xmm2 + VPCMP $4, (%rdi), %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- movups -16(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ vmovdqu -16(%rsi, %rdx, CHAR_SIZE), %xmm2 + VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 + addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx + kmovd %k1, %eax diff --git a/SOURCES/glibc-upstream-2.34-178.patch b/SOURCES/glibc-upstream-2.34-178.patch new file mode 100644 index 0000000..1540e2f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-178.patch @@ -0,0 +1,690 @@ +commit f35ad30da4880a1574996df0674986ecf82fa7ae +Author: H.J. Lu +Date: Fri Oct 29 12:40:20 2021 -0700 + + x86-64: Improve EVEX strcmp with masked load + + In strcmp-evex.S, to compare 2 32-byte strings, replace + + VMOVU (%rdi, %rdx), %YMM0 + VMOVU (%rsi, %rdx), %YMM1 + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + with + + VMOVU (%rdi, %rdx), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 + /* Each bit cleared in K1 represents a mismatch or a null CHAR + in YMM0 and 32 bytes at (%rsi, %rdx). */ + VPCMP $0, (%rsi, %rdx), %YMM0, %k1{%k2} + kmovd %k1, %ecx + incl %ecx + jne L(last_vector) + + It makes EVEX strcmp faster than AVX2 strcmp by up to 40% on Tiger Lake + and Ice Lake. + + Co-Authored-By: Noah Goldstein + (cherry picked from commit c46e9afb2df5fc9e39ff4d13777e4b4c26e04e55) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index d5aa6daa46c7ed25..82f12ac89bcae20b 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -41,6 +41,8 @@ + # ifdef USE_AS_WCSCMP + /* Compare packed dwords. */ + # define VPCMP vpcmpd ++# define VPMINU vpminud ++# define VPTESTM vptestmd + # define SHIFT_REG32 r8d + # define SHIFT_REG64 r8 + /* 1 dword char == 4 bytes. */ +@@ -48,6 +50,8 @@ + # else + /* Compare packed bytes. */ + # define VPCMP vpcmpb ++# define VPMINU vpminub ++# define VPTESTM vptestmb + # define SHIFT_REG32 ecx + # define SHIFT_REG64 rcx + /* 1 byte char == 1 byte. */ +@@ -67,6 +71,9 @@ + # define YMM5 ymm22 + # define YMM6 ymm23 + # define YMM7 ymm24 ++# define YMM8 ymm25 ++# define YMM9 ymm26 ++# define YMM10 ymm27 + + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. +@@ -76,7 +83,7 @@ + /* The main idea of the string comparison (byte or dword) using 256-bit + EVEX instructions consists of comparing (VPCMP) two ymm vectors. The + latter can be on either packed bytes or dwords depending on +- USE_AS_WCSCMP. In order to check the null char, algorithm keeps the ++ USE_AS_WCSCMP. In order to check the null CHAR, algorithm keeps the + matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2 + KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes) + are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd +@@ -123,27 +130,21 @@ ENTRY (STRCMP) + jg L(cross_page) + /* Start comparing 4 vectors. */ + VMOVU (%rdi), %YMM0 +- VMOVU (%rsi), %YMM1 + +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 + +- /* Check for NULL in YMM0. */ +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- /* Check for NULL in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (%rsi). */ ++ VPCMP $0, (%rsi), %YMM0, %k1{%k2} + +- /* Each bit in K1 represents: +- 1. A mismatch in YMM0 and YMM1. Or +- 2. A NULL in YMM0 or YMM1. +- */ +- kord %k0, %k1, %k1 +- +- ktestd %k1, %k1 +- je L(next_3_vectors) + kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ je L(next_3_vectors) + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -172,9 +173,7 @@ L(return): + # endif + ret + +- .p2align 4 + L(return_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -210,9 +209,7 @@ L(return_vec_size): + # endif + ret + +- .p2align 4 + L(return_2_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -248,9 +245,7 @@ L(return_2_vec_size): + # endif + ret + +- .p2align 4 + L(return_3_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -289,43 +284,45 @@ L(return_3_vec_size): + .p2align 4 + L(next_3_vectors): + VMOVU VEC_SIZE(%rdi), %YMM0 +- VMOVU VEC_SIZE(%rsi), %YMM1 +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at VEC_SIZE(%rsi). */ ++ VPCMP $0, VEC_SIZE(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_vec_size) + +- VMOVU (VEC_SIZE * 2)(%rdi), %YMM2 +- VMOVU (VEC_SIZE * 3)(%rdi), %YMM3 +- VMOVU (VEC_SIZE * 2)(%rsi), %YMM4 +- VMOVU (VEC_SIZE * 3)(%rsi), %YMM5 +- +- /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */ +- VPCMP $4, %YMM2, %YMM4, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM4, %k2 +- /* Each bit in K1 represents a NULL in YMM2 or YMM4. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ VMOVU (VEC_SIZE * 2)(%rdi), %YMM0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ ++ VPCMP $0, (VEC_SIZE * 2)(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_2_vec_size) + +- /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */ +- VPCMP $4, %YMM3, %YMM5, %k0 +- VPCMP $0, %YMMZERO, %YMM3, %k1 +- VPCMP $0, %YMMZERO, %YMM5, %k2 +- /* Each bit in K1 represents a NULL in YMM3 or YMM5. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ VMOVU (VEC_SIZE * 3)(%rdi), %YMM0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ ++ VPCMP $0, (VEC_SIZE * 3)(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_3_vec_size) + L(main_loop_header): + leaq (VEC_SIZE * 4)(%rdi), %rdx +@@ -375,56 +372,51 @@ L(back_to_loop): + VMOVA VEC_SIZE(%rax), %YMM2 + VMOVA (VEC_SIZE * 2)(%rax), %YMM4 + VMOVA (VEC_SIZE * 3)(%rax), %YMM6 +- VMOVU (%rdx), %YMM1 +- VMOVU VEC_SIZE(%rdx), %YMM3 +- VMOVU (VEC_SIZE * 2)(%rdx), %YMM5 +- VMOVU (VEC_SIZE * 3)(%rdx), %YMM7 +- +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K4 represents a NULL or a mismatch in YMM0 and +- YMM1. */ +- kord %k0, %k1, %k4 +- +- VPCMP $4, %YMM2, %YMM3, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM3, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K5 represents a NULL or a mismatch in YMM2 and +- YMM3. */ +- kord %k0, %k1, %k5 +- +- VPCMP $4, %YMM4, %YMM5, %k0 +- VPCMP $0, %YMMZERO, %YMM4, %k1 +- VPCMP $0, %YMMZERO, %YMM5, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K6 represents a NULL or a mismatch in YMM4 and +- YMM5. */ +- kord %k0, %k1, %k6 +- +- VPCMP $4, %YMM6, %YMM7, %k0 +- VPCMP $0, %YMMZERO, %YMM6, %k1 +- VPCMP $0, %YMMZERO, %YMM7, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K7 represents a NULL or a mismatch in YMM6 and +- YMM7. */ +- kord %k0, %k1, %k7 +- +- kord %k4, %k5, %k0 +- kord %k6, %k7, %k1 +- +- /* Test each mask (32 bits) individually because for VEC_SIZE +- == 32 is not possible to OR the four masks and keep all bits +- in a 64-bit integer register, differing from SSE2 strcmp +- where ORing is possible. */ +- kortestd %k0, %k1 +- je L(loop) +- ktestd %k4, %k4 ++ ++ VPMINU %YMM0, %YMM2, %YMM8 ++ VPMINU %YMM4, %YMM6, %YMM9 ++ ++ /* A zero CHAR in YMM8 means that there is a null CHAR. */ ++ VPMINU %YMM8, %YMM9, %YMM8 ++ ++ /* Each bit set in K1 represents a non-null CHAR in YMM8. */ ++ VPTESTM %YMM8, %YMM8, %k1 ++ ++ /* (YMM ^ YMM): A non-zero CHAR represents a mismatch. */ ++ vpxorq (%rdx), %YMM0, %YMM1 ++ vpxorq VEC_SIZE(%rdx), %YMM2, %YMM3 ++ vpxorq (VEC_SIZE * 2)(%rdx), %YMM4, %YMM5 ++ vpxorq (VEC_SIZE * 3)(%rdx), %YMM6, %YMM7 ++ ++ vporq %YMM1, %YMM3, %YMM9 ++ vporq %YMM5, %YMM7, %YMM10 ++ ++ /* A non-zero CHAR in YMM9 represents a mismatch. */ ++ vporq %YMM9, %YMM10, %YMM9 ++ ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR. */ ++ VPCMP $0, %YMMZERO, %YMM9, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ je L(loop) ++ ++ /* Each bit set in K1 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM0 and (%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_vec) +- kmovd %k4, %edi +- tzcntl %edi, %ecx ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +@@ -466,9 +458,18 @@ L(test_vec): + cmpq $VEC_SIZE, %r11 + jbe L(zero) + # endif +- ktestd %k5, %k5 ++ /* Each bit set in K1 represents a non-null CHAR in YMM2. */ ++ VPTESTM %YMM2, %YMM2, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM2 and VEC_SIZE(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM3, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_2_vec) +- kmovd %k5, %ecx + tzcntl %ecx, %edi + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -512,9 +513,18 @@ L(test_2_vec): + cmpq $(VEC_SIZE * 2), %r11 + jbe L(zero) + # endif +- ktestd %k6, %k6 ++ /* Each bit set in K1 represents a non-null CHAR in YMM4. */ ++ VPTESTM %YMM4, %YMM4, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM4 and (VEC_SIZE * 2)(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM5, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_3_vec) +- kmovd %k6, %ecx + tzcntl %ecx, %edi + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -558,8 +568,18 @@ L(test_3_vec): + cmpq $(VEC_SIZE * 3), %r11 + jbe L(zero) + # endif +- kmovd %k7, %esi +- tzcntl %esi, %ecx ++ /* Each bit set in K1 represents a non-null CHAR in YMM6. */ ++ VPTESTM %YMM6, %YMM6, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM6 and (VEC_SIZE * 3)(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM7, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +@@ -615,39 +635,51 @@ L(loop_cross_page): + + VMOVU (%rax, %r10), %YMM2 + VMOVU VEC_SIZE(%rax, %r10), %YMM3 +- VMOVU (%rdx, %r10), %YMM4 +- VMOVU VEC_SIZE(%rdx, %r10), %YMM5 +- +- VPCMP $4, %YMM4, %YMM2, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM4, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch in YMM2 and +- YMM4. */ +- kord %k0, %k1, %k1 +- +- VPCMP $4, %YMM5, %YMM3, %k3 +- VPCMP $0, %YMMZERO, %YMM3, %k4 +- VPCMP $0, %YMMZERO, %YMM5, %k5 +- kord %k4, %k5, %k4 +- /* Each bit in K3 represents a NULL or a mismatch in YMM3 and +- YMM5. */ +- kord %k3, %k4, %k3 ++ ++ /* Each bit set in K2 represents a non-null CHAR in YMM2. */ ++ VPTESTM %YMM2, %YMM2, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM2 and 32 bytes at (%rdx, %r10). */ ++ VPCMP $0, (%rdx, %r10), %YMM2, %k1{%k2} ++ kmovd %k1, %r9d ++ /* Don't use subl since it is the lower 16/32 bits of RDI ++ below. */ ++ notl %r9d ++# ifdef USE_AS_WCSCMP ++ /* Only last 8 bits are valid. */ ++ andl $0xff, %r9d ++# endif ++ ++ /* Each bit set in K4 represents a non-null CHAR in YMM3. */ ++ VPTESTM %YMM3, %YMM3, %k4 ++ /* Each bit cleared in K3 represents a mismatch or a null CHAR ++ in YMM3 and 32 bytes at VEC_SIZE(%rdx, %r10). */ ++ VPCMP $0, VEC_SIZE(%rdx, %r10), %YMM3, %k3{%k4} ++ kmovd %k3, %edi ++# ifdef USE_AS_WCSCMP ++ /* Don't use subl since it is the upper 8 bits of EDI below. */ ++ notl %edi ++ andl $0xff, %edi ++# else ++ incl %edi ++# endif + + # ifdef USE_AS_WCSCMP +- /* NB: Each bit in K1/K3 represents 4-byte element. */ +- kshiftlw $8, %k3, %k2 ++ /* NB: Each bit in EDI/R9D represents 4-byte element. */ ++ sall $8, %edi + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG32 + sarl $2, %SHIFT_REG32 ++ ++ /* Each bit in EDI represents a null CHAR or a mismatch. */ ++ orl %r9d, %edi + # else +- kshiftlq $32, %k3, %k2 +-# endif ++ salq $32, %rdi + +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korq %k1, %k2, %k1 +- kmovq %k1, %rdi ++ /* Each bit in RDI represents a null CHAR or a mismatch. */ ++ orq %r9, %rdi ++# endif + + /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ + shrxq %SHIFT_REG64, %rdi, %rdi +@@ -692,35 +724,45 @@ L(loop_cross_page_2_vec): + /* The first VEC_SIZE * 2 bytes match or are ignored. */ + VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 + VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 +- VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2 +- VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3 +- +- VPCMP $4, %YMM0, %YMM2, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM2, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch in YMM0 and +- YMM2. */ +- kord %k0, %k1, %k1 +- +- VPCMP $4, %YMM1, %YMM3, %k3 +- VPCMP $0, %YMMZERO, %YMM1, %k4 +- VPCMP $0, %YMMZERO, %YMM3, %k5 +- kord %k4, %k5, %k4 +- /* Each bit in K3 represents a NULL or a mismatch in YMM1 and +- YMM3. */ +- kord %k3, %k4, %k3 + ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rdx, %r10). */ ++ VPCMP $0, (VEC_SIZE * 2)(%rdx, %r10), %YMM0, %k1{%k2} ++ kmovd %k1, %r9d ++ /* Don't use subl since it is the lower 16/32 bits of RDI ++ below. */ ++ notl %r9d + # ifdef USE_AS_WCSCMP +- /* NB: Each bit in K1/K3 represents 4-byte element. */ +- kshiftlw $8, %k3, %k2 ++ /* Only last 8 bits are valid. */ ++ andl $0xff, %r9d ++# endif ++ ++ VPTESTM %YMM1, %YMM1, %k4 ++ /* Each bit cleared in K3 represents a mismatch or a null CHAR ++ in YMM1 and 32 bytes at (VEC_SIZE * 3)(%rdx, %r10). */ ++ VPCMP $0, (VEC_SIZE * 3)(%rdx, %r10), %YMM1, %k3{%k4} ++ kmovd %k3, %edi ++# ifdef USE_AS_WCSCMP ++ /* Don't use subl since it is the upper 8 bits of EDI below. */ ++ notl %edi ++ andl $0xff, %edi + # else +- kshiftlq $32, %k3, %k2 ++ incl %edi + # endif + +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korq %k1, %k2, %k1 +- kmovq %k1, %rdi ++# ifdef USE_AS_WCSCMP ++ /* NB: Each bit in EDI/R9D represents 4-byte element. */ ++ sall $8, %edi ++ ++ /* Each bit in EDI represents a null CHAR or a mismatch. */ ++ orl %r9d, %edi ++# else ++ salq $32, %rdi ++ ++ /* Each bit in RDI represents a null CHAR or a mismatch. */ ++ orq %r9, %rdi ++# endif + + xorl %r8d, %r8d + /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ +@@ -729,12 +771,15 @@ L(loop_cross_page_2_vec): + /* R8 has number of bytes skipped. */ + movl %ecx, %r8d + # ifdef USE_AS_WCSCMP +- /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ /* NB: Divide shift count by 4 since each bit in RDI represent 4 + bytes. */ + sarl $2, %ecx +-# endif ++ /* Skip ECX bytes. */ ++ shrl %cl, %edi ++# else + /* Skip ECX bytes. */ + shrq %cl, %rdi ++# endif + 1: + /* Before jumping back to the loop, set ESI to the number of + VEC_SIZE * 4 blocks before page crossing. */ +@@ -818,7 +863,7 @@ L(cross_page_loop): + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx + # endif +- /* Check null char. */ ++ /* Check null CHAR. */ + testl %eax, %eax + jne L(cross_page_loop) + /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED +@@ -901,18 +946,17 @@ L(cross_page): + jg L(cross_page_1_vector) + L(loop_1_vector): + VMOVU (%rdi, %rdx), %YMM0 +- VMOVU (%rsi, %rdx), %YMM1 +- +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 ++ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (%rsi, %rdx). */ ++ VPCMP $0, (%rsi, %rdx), %YMM0, %k1{%k2} + kmovd %k1, %ecx +- testl %ecx, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(last_vector) + + addl $VEC_SIZE, %edx +@@ -931,18 +975,17 @@ L(cross_page_1_vector): + cmpl $(PAGE_SIZE - 16), %eax + jg L(cross_page_1_xmm) + VMOVU (%rdi, %rdx), %XMM0 +- VMOVU (%rsi, %rdx), %XMM1 +- +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- korw %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korw %k0, %k1, %k1 +- kmovw %k1, %ecx +- testl %ecx, %ecx ++ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and 16 bytes at (%rsi, %rdx). */ ++ VPCMP $0, (%rsi, %rdx), %XMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xf, %ecx ++# else ++ subl $0xffff, %ecx ++# endif + jne L(last_vector) + + addl $16, %edx +@@ -965,25 +1008,16 @@ L(cross_page_1_xmm): + vmovq (%rdi, %rdx), %XMM0 + vmovq (%rsi, %rdx), %XMM1 + +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- kmovd %k1, %ecx +- ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and XMM1. */ ++ VPCMP $0, %XMM1, %XMM0, %k1{%k2} ++ kmovb %k1, %ecx + # ifdef USE_AS_WCSCMP +- /* Only last 2 bits are valid. */ +- andl $0x3, %ecx ++ subl $0x3, %ecx + # else +- /* Only last 8 bits are valid. */ +- andl $0xff, %ecx ++ subl $0xff, %ecx + # endif +- +- testl %ecx, %ecx + jne L(last_vector) + + addl $8, %edx +@@ -1002,25 +1036,16 @@ L(cross_page_8bytes): + vmovd (%rdi, %rdx), %XMM0 + vmovd (%rsi, %rdx), %XMM1 + +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and XMM1. */ ++ VPCMP $0, %XMM1, %XMM0, %k1{%k2} + kmovd %k1, %ecx +- + # ifdef USE_AS_WCSCMP +- /* Only the last bit is valid. */ +- andl $0x1, %ecx ++ subl $0x1, %ecx + # else +- /* Only last 4 bits are valid. */ +- andl $0xf, %ecx ++ subl $0xf, %ecx + # endif +- +- testl %ecx, %ecx + jne L(last_vector) + + addl $4, %edx diff --git a/SOURCES/glibc-upstream-2.34-179.patch b/SOURCES/glibc-upstream-2.34-179.patch new file mode 100644 index 0000000..e9a4329 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-179.patch @@ -0,0 +1,85 @@ +commit a182bb7a3922404f79def09d79ef89678b4049f0 +Author: H.J. Lu +Date: Fri Oct 29 12:56:53 2021 -0700 + + x86-64: Remove Prefer_AVX2_STRCMP + + Remove Prefer_AVX2_STRCMP to enable EVEX strcmp. When comparing 2 32-byte + strings, EVEX strcmp has been improved to require 1 load, 1 VPTESTM, 1 + VPCMP, 1 KMOVD and 1 INCL instead of 2 loads, 3 VPCMPs, 2 KORDs, 1 KMOVD + and 1 TESTL while AVX2 strcmp requires 1 load, 2 VPCMPEQs, 1 VPMINU, 1 + VPMOVMSKB and 1 TESTL. EVEX strcmp is now faster than AVX2 strcmp by up + to 40% on Tiger Lake and Ice Lake. + + (cherry picked from commit 14dbbf46a007ae5df36646b51ad0c9e5f5259f30) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index de4e3c3b7258120d..f4d4049e391cbabd 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -574,14 +574,6 @@ disable_tsx: + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; +- +- /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp +- requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp +- requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, +- AVX2 strcmp is faster than EVEX strcmp. */ +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) +- cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP] +- |= bit_arch_Prefer_AVX2_STRCMP; + } + + /* Avoid avoid short distance REP MOVSB on processor with FSRM. */ +diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c +index 58f2fad4323d5d91..957db3ad229ba39f 100644 +--- a/sysdeps/x86/cpu-tunables.c ++++ b/sysdeps/x86/cpu-tunables.c +@@ -239,8 +239,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) + CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, + Fast_Copy_Backward, + disable, 18); +- CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH +- (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18); + } + break; + case 19: +diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +index 3bdc76cf71007948..8250bfcbecd29a9f 100644 +--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def ++++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +@@ -31,5 +31,4 @@ BIT (Prefer_ERMS) + BIT (Prefer_No_AVX512) + BIT (MathVec_Prefer_No_AVX512) + BIT (Prefer_FSRM) +-BIT (Prefer_AVX2_STRCMP) + BIT (Avoid_Short_Distance_REP_MOVSB) +diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c +index 62b7abeeee646ab4..7c2901bf44456259 100644 +--- a/sysdeps/x86_64/multiarch/strcmp.c ++++ b/sysdeps/x86_64/multiarch/strcmp.c +@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void) + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) +diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c +index 60ba0fe356b31779..f94a421784bfe923 100644 +--- a/sysdeps/x86_64/multiarch/strncmp.c ++++ b/sysdeps/x86_64/multiarch/strncmp.c +@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void) + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) diff --git a/SOURCES/glibc-upstream-2.34-180.patch b/SOURCES/glibc-upstream-2.34-180.patch new file mode 100644 index 0000000..9707cf2 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-180.patch @@ -0,0 +1,48 @@ +commit 2e64237a8744dd50f9222293275fa52e7248ff76 +Author: Fangrui Song +Date: Tue Nov 2 20:59:52 2021 -0700 + + x86-64: Replace movzx with movzbl + + Clang cannot assemble movzx in the AT&T dialect mode. + + ../sysdeps/x86_64/strcmp.S:2232:16: error: invalid operand for instruction + movzx (%rsi), %ecx + ^~~~ + + Change movzx to movzbl, which follows the AT&T dialect and is used + elsewhere in the file. + + Reviewed-by: H.J. Lu + (cherry picked from commit 6720d36b6623c5e48c070d86acf61198b33e144e) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S +index bc19547b09639071..6197a723b9e0606e 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S ++++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S +@@ -1771,8 +1771,8 @@ LABEL(strcmp_exitz): + .p2align 4 + // XXX Same as code above + LABEL(Byte0): +- movzx (%rsi), %ecx +- movzx (%rdi), %eax ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax + + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx +diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S +index 824e648230a15739..7f8a1bc756f86aee 100644 +--- a/sysdeps/x86_64/strcmp.S ++++ b/sysdeps/x86_64/strcmp.S +@@ -2232,8 +2232,8 @@ LABEL(strcmp_exitz): + + .p2align 4 + LABEL(Byte0): +- movzx (%rsi), %ecx +- movzx (%rdi), %eax ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax + + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx diff --git a/SOURCES/glibc-upstream-2.34-181.patch b/SOURCES/glibc-upstream-2.34-181.patch new file mode 100644 index 0000000..36a401f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-181.patch @@ -0,0 +1,843 @@ +commit a7392db2ff2b9dd906500941ac6361dbe2211b0d +Author: Noah Goldstein +Date: Mon Nov 1 00:49:51 2021 -0500 + + x86: Optimize memmove-vec-unaligned-erms.S + + No bug. + + The optimizations are as follows: + + 1) Always align entry to 64 bytes. This makes behavior more + predictable and makes other frontend optimizations easier. + + 2) Make the L(more_8x_vec) cases 4k aliasing aware. This can have + significant benefits in the case that: + 0 < (dst - src) < [256, 512] + + 3) Align before `rep movsb`. For ERMS this is roughly a [0, 30%] + improvement and for FSRM [-10%, 25%]. + + In addition to these primary changes there is general cleanup + throughout to optimize the aligning routines and control flow logic. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit a6b7502ec0c2da89a7437f43171f160d713e39c6) + +diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S +index db106a7a1f23f268..b2b318084823dceb 100644 +--- a/sysdeps/x86_64/memmove.S ++++ b/sysdeps/x86_64/memmove.S +@@ -25,7 +25,7 @@ + /* Use movups and movaps for smaller code sizes. */ + #define VMOVU movups + #define VMOVA movaps +- ++#define MOV_SIZE 3 + #define SECTION(p) p + + #ifdef USE_MULTIARCH +diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +index 1ec1962e861dbf63..67a55f0c85af841c 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +@@ -4,7 +4,7 @@ + # define VMOVNT vmovntdq + # define VMOVU vmovdqu + # define VMOVA vmovdqa +- ++# define MOV_SIZE 4 + # define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S +index e195e93f153c9512..975ae6c0515b83cb 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S +@@ -4,7 +4,7 @@ + # define VMOVNT vmovntdq + # define VMOVU vmovdqu + # define VMOVA vmovdqa +- ++# define MOV_SIZE 4 + # define SECTION(p) p##.avx + # define MEMMOVE_SYMBOL(p,s) p##_avx_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +index 848848ab39ff9326..0fa7126830af7acb 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +@@ -25,7 +25,7 @@ + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + # define VZEROUPPER +- ++# define MOV_SIZE 6 + # define SECTION(p) p##.evex512 + # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +index 0cbce8f944da51a0..88715441feaaccf5 100644 +--- a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +@@ -25,7 +25,7 @@ + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + # define VZEROUPPER +- ++# define MOV_SIZE 6 + # define SECTION(p) p##.evex + # define MEMMOVE_SYMBOL(p,s) p##_evex_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index abde8438d41f2320..7b27cbdda5fb99f7 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -76,6 +76,25 @@ + # endif + #endif + ++/* Whether to align before movsb. Ultimately we want 64 byte ++ align and not worth it to load 4x VEC for VEC_SIZE == 16. */ ++#define ALIGN_MOVSB (VEC_SIZE > 16) ++/* Number of bytes to align movsb to. */ ++#define MOVSB_ALIGN_TO 64 ++ ++#define SMALL_MOV_SIZE (MOV_SIZE <= 4) ++#define LARGE_MOV_SIZE (MOV_SIZE > 4) ++ ++#if SMALL_MOV_SIZE + LARGE_MOV_SIZE != 1 ++# error MOV_SIZE Unknown ++#endif ++ ++#if LARGE_MOV_SIZE ++# define SMALL_SIZE_OFFSET (4) ++#else ++# define SMALL_SIZE_OFFSET (0) ++#endif ++ + #ifndef PAGE_SIZE + # define PAGE_SIZE 4096 + #endif +@@ -199,25 +218,21 @@ L(start): + # endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) ++ /* Load regardless. */ ++ VMOVU (%rsi), %VEC(0) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(more_2x_vec) +-#if !defined USE_MULTIARCH || !IS_IN (libc) +-L(last_2x_vec): +-#endif + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU (%rsi), %VEC(0) + VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) +-#if !defined USE_MULTIARCH || !IS_IN (libc) +-L(nop): +- ret ++#if !(defined USE_MULTIARCH && IS_IN (libc)) ++ ZERO_UPPER_VEC_REGISTERS_RETURN + #else + VZEROUPPER_RETURN + #endif + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMMOVE_SYMBOL (__memmove, unaligned)) +- + # if VEC_SIZE == 16 + ENTRY (__mempcpy_chk_erms) + cmp %RDX_LP, %RCX_LP +@@ -289,7 +304,7 @@ ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) + END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) + # endif + +-ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) ++ENTRY_P2ALIGN (MEMMOVE_SYMBOL (__memmove, unaligned_erms), 6) + movq %rdi, %rax + L(start_erms): + # ifdef __ILP32__ +@@ -298,310 +313,448 @@ L(start_erms): + # endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) ++ /* Load regardless. */ ++ VMOVU (%rsi), %VEC(0) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(movsb_more_2x_vec) +-L(last_2x_vec): +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. ++ */ ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) ++ VMOVU %VEC(1), -VEC_SIZE(%rdi, %rdx) + L(return): +-#if VEC_SIZE > 16 ++# if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +-#else ++# else + ret ++# endif + #endif + +-L(movsb): +- cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP +- jae L(more_8x_vec) +- cmpq %rsi, %rdi +- jb 1f +- /* Source == destination is less common. */ +- je L(nop) +- leaq (%rsi,%rdx), %r9 +- cmpq %r9, %rdi +- /* Avoid slow backward REP MOVSB. */ +- jb L(more_8x_vec_backward) +-# if AVOID_SHORT_DISTANCE_REP_MOVSB +- testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) +- jz 3f +- movq %rdi, %rcx +- subq %rsi, %rcx +- jmp 2f +-# endif +-1: +-# if AVOID_SHORT_DISTANCE_REP_MOVSB +- testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) +- jz 3f +- movq %rsi, %rcx +- subq %rdi, %rcx +-2: +-/* Avoid "rep movsb" if RCX, the distance between source and destination, +- is N*4GB + [1..63] with N >= 0. */ +- cmpl $63, %ecx +- jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ +-3: +-# endif +- mov %RDX_LP, %RCX_LP +- rep movsb +-L(nop): ++#if LARGE_MOV_SIZE ++ /* If LARGE_MOV_SIZE this fits in the aligning bytes between the ++ ENTRY block and L(less_vec). */ ++ .p2align 4,, 8 ++L(between_4_7): ++ /* From 4 to 7. No branch when size == 4. */ ++ movl (%rsi), %ecx ++ movl (%rsi, %rdx), %esi ++ movl %ecx, (%rdi) ++ movl %esi, (%rdi, %rdx) + ret + #endif + ++ .p2align 4 + L(less_vec): + /* Less than 1 VEC. */ + #if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! + #endif + #if VEC_SIZE > 32 +- cmpb $32, %dl ++ cmpl $32, %edx + jae L(between_32_63) + #endif + #if VEC_SIZE > 16 +- cmpb $16, %dl ++ cmpl $16, %edx + jae L(between_16_31) + #endif +- cmpb $8, %dl ++ cmpl $8, %edx + jae L(between_8_15) +- cmpb $4, %dl ++#if SMALL_MOV_SIZE ++ cmpl $4, %edx ++#else ++ subq $4, %rdx ++#endif + jae L(between_4_7) +- cmpb $1, %dl +- ja L(between_2_3) +- jb 1f +- movzbl (%rsi), %ecx ++ cmpl $(1 - SMALL_SIZE_OFFSET), %edx ++ jl L(copy_0) ++ movb (%rsi), %cl ++ je L(copy_1) ++ movzwl (-2 + SMALL_SIZE_OFFSET)(%rsi, %rdx), %esi ++ movw %si, (-2 + SMALL_SIZE_OFFSET)(%rdi, %rdx) ++L(copy_1): + movb %cl, (%rdi) +-1: ++L(copy_0): + ret ++ ++#if SMALL_MOV_SIZE ++ .p2align 4,, 8 ++L(between_4_7): ++ /* From 4 to 7. No branch when size == 4. */ ++ movl -4(%rsi, %rdx), %ecx ++ movl (%rsi), %esi ++ movl %ecx, -4(%rdi, %rdx) ++ movl %esi, (%rdi) ++ ret ++#endif ++ ++#if VEC_SIZE > 16 ++ /* From 16 to 31. No branch when size == 16. */ ++ .p2align 4,, 8 ++L(between_16_31): ++ vmovdqu (%rsi), %xmm0 ++ vmovdqu -16(%rsi, %rdx), %xmm1 ++ vmovdqu %xmm0, (%rdi) ++ vmovdqu %xmm1, -16(%rdi, %rdx) ++ /* No ymm registers have been touched. */ ++ ret ++#endif ++ + #if VEC_SIZE > 32 ++ .p2align 4,, 10 + L(between_32_63): + /* From 32 to 63. No branch when size == 32. */ + VMOVU (%rsi), %YMM0 +- VMOVU -32(%rsi,%rdx), %YMM1 ++ VMOVU -32(%rsi, %rdx), %YMM1 + VMOVU %YMM0, (%rdi) +- VMOVU %YMM1, -32(%rdi,%rdx) +- VZEROUPPER_RETURN +-#endif +-#if VEC_SIZE > 16 +- /* From 16 to 31. No branch when size == 16. */ +-L(between_16_31): +- VMOVU (%rsi), %XMM0 +- VMOVU -16(%rsi,%rdx), %XMM1 +- VMOVU %XMM0, (%rdi) +- VMOVU %XMM1, -16(%rdi,%rdx) ++ VMOVU %YMM1, -32(%rdi, %rdx) + VZEROUPPER_RETURN + #endif ++ ++ .p2align 4,, 10 + L(between_8_15): + /* From 8 to 15. No branch when size == 8. */ +- movq -8(%rsi,%rdx), %rcx ++ movq -8(%rsi, %rdx), %rcx + movq (%rsi), %rsi +- movq %rcx, -8(%rdi,%rdx) + movq %rsi, (%rdi) ++ movq %rcx, -8(%rdi, %rdx) + ret +-L(between_4_7): +- /* From 4 to 7. No branch when size == 4. */ +- movl -4(%rsi,%rdx), %ecx +- movl (%rsi), %esi +- movl %ecx, -4(%rdi,%rdx) +- movl %esi, (%rdi) +- ret +-L(between_2_3): +- /* From 2 to 3. No branch when size == 2. */ +- movzwl -2(%rsi,%rdx), %ecx +- movzwl (%rsi), %esi +- movw %cx, -2(%rdi,%rdx) +- movw %si, (%rdi) +- ret + ++ .p2align 4,, 10 ++L(last_4x_vec): ++ /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */ ++ ++ /* VEC(0) and VEC(1) have already been loaded. */ ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(2) ++ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(3) ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(1), VEC_SIZE(%rdi) ++ VMOVU %VEC(2), -VEC_SIZE(%rdi, %rdx) ++ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi, %rdx) ++ VZEROUPPER_RETURN ++ ++ .p2align 4 + #if defined USE_MULTIARCH && IS_IN (libc) + L(movsb_more_2x_vec): + cmp __x86_rep_movsb_threshold(%rip), %RDX_LP + ja L(movsb) + #endif + L(more_2x_vec): +- /* More than 2 * VEC and there may be overlap between destination +- and source. */ ++ /* More than 2 * VEC and there may be overlap between ++ destination and source. */ + cmpq $(VEC_SIZE * 8), %rdx + ja L(more_8x_vec) ++ /* Load VEC(1) regardless. VEC(0) has already been loaded. */ ++ VMOVU VEC_SIZE(%rsi), %VEC(1) + cmpq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec) +- /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) ++ /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */ + VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) + VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(4) +- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5) +- VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6) +- VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7) ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(4) ++ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(5) ++ VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(6) ++ VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(7) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), VEC_SIZE(%rdi) + VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi) + VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi) +- VMOVU %VEC(4), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) +- VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) +- VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) +- VZEROUPPER_RETURN +-L(last_4x_vec): +- /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2) +- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3) +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(1), VEC_SIZE(%rdi) +- VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) ++ VMOVU %VEC(4), -VEC_SIZE(%rdi, %rdx) ++ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi, %rdx) ++ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi, %rdx) ++ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi, %rdx) + VZEROUPPER_RETURN + ++ .p2align 4,, 4 + L(more_8x_vec): ++ movq %rdi, %rcx ++ subq %rsi, %rcx ++ /* Go to backwards temporal copy if overlap no matter what as ++ backward REP MOVSB is slow and we don't want to use NT stores if ++ there is overlap. */ ++ cmpq %rdx, %rcx ++ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */ ++ jb L(more_8x_vec_backward_check_nop) + /* Check if non-temporal move candidate. */ + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ +- cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_memcpy_2x) + #endif +- /* Entry if rdx is greater than non-temporal threshold but there +- is overlap. */ ++ /* To reach this point there cannot be overlap and dst > src. So ++ check for overlap and src > dst in which case correctness ++ requires forward copy. Otherwise decide between backward/forward ++ copy depending on address aliasing. */ ++ ++ /* Entry if rdx is greater than __x86_rep_movsb_stop_threshold ++ but less than __x86_shared_non_temporal_threshold. */ + L(more_8x_vec_check): +- cmpq %rsi, %rdi +- ja L(more_8x_vec_backward) +- /* Source == destination is less common. */ +- je L(nop) +- /* Load the first VEC and last 4 * VEC to support overlapping +- addresses. */ +- VMOVU (%rsi), %VEC(4) ++ /* rcx contains dst - src. Add back length (rdx). */ ++ leaq (%rcx, %rdx), %r8 ++ /* If r8 has different sign than rcx then there is overlap so we ++ must do forward copy. */ ++ xorq %rcx, %r8 ++ /* Isolate just sign bit of r8. */ ++ shrq $63, %r8 ++ /* Get 4k difference dst - src. */ ++ andl $(PAGE_SIZE - 256), %ecx ++ /* If r8 is non-zero must do foward for correctness. Otherwise ++ if ecx is non-zero there is 4k False Alaising so do backward ++ copy. */ ++ addl %r8d, %ecx ++ jz L(more_8x_vec_backward) ++ ++ /* if rdx is greater than __x86_shared_non_temporal_threshold ++ but there is overlap, or from short distance movsb. */ ++L(more_8x_vec_forward): ++ /* Load first and last 4 * VEC to support overlapping addresses. ++ */ ++ ++ /* First vec was already loaded into VEC(0). */ + VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5) + VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6) ++ /* Save begining of dst. */ ++ movq %rdi, %rcx ++ /* Align dst to VEC_SIZE - 1. */ ++ orq $(VEC_SIZE - 1), %rdi + VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7) + VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8) +- /* Save start and stop of the destination buffer. */ +- movq %rdi, %r11 +- leaq -VEC_SIZE(%rdi, %rdx), %rcx +- /* Align destination for aligned stores in the loop. Compute +- how much destination is misaligned. */ +- movq %rdi, %r8 +- andq $(VEC_SIZE - 1), %r8 +- /* Get the negative of offset for alignment. */ +- subq $VEC_SIZE, %r8 +- /* Adjust source. */ +- subq %r8, %rsi +- /* Adjust destination which should be aligned now. */ +- subq %r8, %rdi +- /* Adjust length. */ +- addq %r8, %rdx + +- .p2align 4 ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rcx, %rsi ++ /* Finish aligning dst. */ ++ incq %rdi ++ /* Restore src adjusted with new value for aligned dst. */ ++ addq %rdi, %rsi ++ /* Store end of buffer minus tail in rdx. */ ++ leaq (VEC_SIZE * -4)(%rcx, %rdx), %rdx ++ ++ /* Dont use multi-byte nop to align. */ ++ .p2align 4,, 11 + L(loop_4x_vec_forward): + /* Copy 4 * VEC a time forward. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) +- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) +- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) ++ VMOVU (%rsi), %VEC(1) ++ VMOVU VEC_SIZE(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(3) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(4) + subq $-(VEC_SIZE * 4), %rsi +- addq $-(VEC_SIZE * 4), %rdx +- VMOVA %VEC(0), (%rdi) +- VMOVA %VEC(1), VEC_SIZE(%rdi) +- VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi) +- VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi) ++ VMOVA %VEC(1), (%rdi) ++ VMOVA %VEC(2), VEC_SIZE(%rdi) ++ VMOVA %VEC(3), (VEC_SIZE * 2)(%rdi) ++ VMOVA %VEC(4), (VEC_SIZE * 3)(%rdi) + subq $-(VEC_SIZE * 4), %rdi +- cmpq $(VEC_SIZE * 4), %rdx ++ cmpq %rdi, %rdx + ja L(loop_4x_vec_forward) + /* Store the last 4 * VEC. */ +- VMOVU %VEC(5), (%rcx) +- VMOVU %VEC(6), -VEC_SIZE(%rcx) +- VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx) +- VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) ++ VMOVU %VEC(5), (VEC_SIZE * 3)(%rdx) ++ VMOVU %VEC(6), (VEC_SIZE * 2)(%rdx) ++ VMOVU %VEC(7), VEC_SIZE(%rdx) ++ VMOVU %VEC(8), (%rdx) + /* Store the first VEC. */ +- VMOVU %VEC(4), (%r11) ++ VMOVU %VEC(0), (%rcx) ++ /* Keep L(nop_backward) target close to jmp for 2-byte encoding. ++ */ ++L(nop_backward): + VZEROUPPER_RETURN + ++ .p2align 4,, 8 ++L(more_8x_vec_backward_check_nop): ++ /* rcx contains dst - src. Test for dst == src to skip all of ++ memmove. */ ++ testq %rcx, %rcx ++ jz L(nop_backward) + L(more_8x_vec_backward): + /* Load the first 4 * VEC and last VEC to support overlapping + addresses. */ +- VMOVU (%rsi), %VEC(4) ++ ++ /* First vec was also loaded into VEC(0). */ + VMOVU VEC_SIZE(%rsi), %VEC(5) + VMOVU (VEC_SIZE * 2)(%rsi), %VEC(6) ++ /* Begining of region for 4x backward copy stored in rcx. */ ++ leaq (VEC_SIZE * -4 + -1)(%rdi, %rdx), %rcx + VMOVU (VEC_SIZE * 3)(%rsi), %VEC(7) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(8) +- /* Save stop of the destination buffer. */ +- leaq -VEC_SIZE(%rdi, %rdx), %r11 +- /* Align destination end for aligned stores in the loop. Compute +- how much destination end is misaligned. */ +- leaq -VEC_SIZE(%rsi, %rdx), %rcx +- movq %r11, %r9 +- movq %r11, %r8 +- andq $(VEC_SIZE - 1), %r8 +- /* Adjust source. */ +- subq %r8, %rcx +- /* Adjust the end of destination which should be aligned now. */ +- subq %r8, %r9 +- /* Adjust length. */ +- subq %r8, %rdx +- +- .p2align 4 ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(8) ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rdi, %rsi ++ /* Align dst. */ ++ andq $-(VEC_SIZE), %rcx ++ /* Restore src. */ ++ addq %rcx, %rsi ++ ++ /* Don't use multi-byte nop to align. */ ++ .p2align 4,, 11 + L(loop_4x_vec_backward): + /* Copy 4 * VEC a time backward. */ +- VMOVU (%rcx), %VEC(0) +- VMOVU -VEC_SIZE(%rcx), %VEC(1) +- VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2) +- VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3) +- addq $-(VEC_SIZE * 4), %rcx +- addq $-(VEC_SIZE * 4), %rdx +- VMOVA %VEC(0), (%r9) +- VMOVA %VEC(1), -VEC_SIZE(%r9) +- VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9) +- VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9) +- addq $-(VEC_SIZE * 4), %r9 +- cmpq $(VEC_SIZE * 4), %rdx +- ja L(loop_4x_vec_backward) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(1) ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 1)(%rsi), %VEC(3) ++ VMOVU (VEC_SIZE * 0)(%rsi), %VEC(4) ++ addq $(VEC_SIZE * -4), %rsi ++ VMOVA %VEC(1), (VEC_SIZE * 3)(%rcx) ++ VMOVA %VEC(2), (VEC_SIZE * 2)(%rcx) ++ VMOVA %VEC(3), (VEC_SIZE * 1)(%rcx) ++ VMOVA %VEC(4), (VEC_SIZE * 0)(%rcx) ++ addq $(VEC_SIZE * -4), %rcx ++ cmpq %rcx, %rdi ++ jb L(loop_4x_vec_backward) + /* Store the first 4 * VEC. */ +- VMOVU %VEC(4), (%rdi) ++ VMOVU %VEC(0), (%rdi) + VMOVU %VEC(5), VEC_SIZE(%rdi) + VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi) + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ +- VMOVU %VEC(8), (%r11) ++ VMOVU %VEC(8), -VEC_SIZE(%rdx, %rdi) ++ VZEROUPPER_RETURN ++ ++#if defined USE_MULTIARCH && IS_IN (libc) ++ /* L(skip_short_movsb_check) is only used with ERMS. Not for ++ FSRM. */ ++ .p2align 5,, 16 ++# if ALIGN_MOVSB ++L(skip_short_movsb_check): ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# endif ++# if MOVSB_ALIGN_TO > (VEC_SIZE * 2) ++# error Unsupported MOVSB_ALIGN_TO ++# endif ++ /* If CPU does not have FSRM two options for aligning. Align src ++ if dst and src 4k alias. Otherwise align dst. */ ++ testl $(PAGE_SIZE - 512), %ecx ++ jnz L(movsb_align_dst) ++ /* Fall through. dst and src 4k alias. It's better to align src ++ here because the bottleneck will be loads dues to the false ++ dependency on dst. */ ++ ++ /* rcx already has dst - src. */ ++ movq %rcx, %r9 ++ /* Add src to len. Subtract back after src aligned. -1 because ++ src is initially aligned to MOVSB_ALIGN_TO - 1. */ ++ leaq -1(%rsi, %rdx), %rcx ++ /* Inclusively align src to MOVSB_ALIGN_TO - 1. */ ++ orq $(MOVSB_ALIGN_TO - 1), %rsi ++ /* Restore dst and len adjusted with new values for aligned dst. ++ */ ++ leaq 1(%rsi, %r9), %rdi ++ subq %rsi, %rcx ++ /* Finish aligning src. */ ++ incq %rsi ++ ++ rep movsb ++ ++ VMOVU %VEC(0), (%r8) ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU %VEC(1), VEC_SIZE(%r8) ++# endif + VZEROUPPER_RETURN ++# endif ++ ++ .p2align 4,, 12 ++L(movsb): ++ movq %rdi, %rcx ++ subq %rsi, %rcx ++ /* Go to backwards temporal copy if overlap no matter what as ++ backward REP MOVSB is slow and we don't want to use NT stores if ++ there is overlap. */ ++ cmpq %rdx, %rcx ++ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */ ++ jb L(more_8x_vec_backward_check_nop) ++# if ALIGN_MOVSB ++ /* Save dest for storing aligning VECs later. */ ++ movq %rdi, %r8 ++# endif ++ /* If above __x86_rep_movsb_stop_threshold most likely is ++ candidate for NT moves aswell. */ ++ cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP ++ jae L(large_memcpy_2x_check) ++# if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB ++ /* Only avoid short movsb if CPU has FSRM. */ ++ testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) ++ jz L(skip_short_movsb_check) ++# if AVOID_SHORT_DISTANCE_REP_MOVSB ++ /* Avoid "rep movsb" if RCX, the distance between source and ++ destination, is N*4GB + [1..63] with N >= 0. */ ++ ++ /* ecx contains dst - src. Early check for backward copy ++ conditions means only case of slow movsb with src = dst + [0, ++ 63] is ecx in [-63, 0]. Use unsigned comparison with -64 check ++ for that case. */ ++ cmpl $-64, %ecx ++ ja L(more_8x_vec_forward) ++# endif ++# endif ++# if ALIGN_MOVSB ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# endif ++# if MOVSB_ALIGN_TO > (VEC_SIZE * 2) ++# error Unsupported MOVSB_ALIGN_TO ++# endif ++ /* Fall through means cpu has FSRM. In that case exclusively ++ align destination. */ ++L(movsb_align_dst): ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rdi, %rsi ++ /* Exclusively align dst to MOVSB_ALIGN_TO (64). */ ++ addq $(MOVSB_ALIGN_TO - 1), %rdi ++ /* Add dst to len. Subtract back after dst aligned. */ ++ leaq (%r8, %rdx), %rcx ++ /* Finish aligning dst. */ ++ andq $-(MOVSB_ALIGN_TO), %rdi ++ /* Restore src and len adjusted with new values for aligned dst. ++ */ ++ addq %rdi, %rsi ++ subq %rdi, %rcx ++ ++ rep movsb ++ ++ /* Store VECs loaded for aligning. */ ++ VMOVU %VEC(0), (%r8) ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU %VEC(1), VEC_SIZE(%r8) ++# endif ++ VZEROUPPER_RETURN ++# else /* !ALIGN_MOVSB. */ ++L(skip_short_movsb_check): ++ mov %RDX_LP, %RCX_LP ++ rep movsb ++ ret ++# endif ++#endif + ++ .p2align 4,, 10 + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) +- .p2align 4 ++L(large_memcpy_2x_check): ++ cmp __x86_rep_movsb_threshold(%rip), %RDX_LP ++ jb L(more_8x_vec_check) + L(large_memcpy_2x): +- /* Compute absolute value of difference between source and +- destination. */ +- movq %rdi, %r9 +- subq %rsi, %r9 +- movq %r9, %r8 +- leaq -1(%r9), %rcx +- sarq $63, %r8 +- xorq %r8, %r9 +- subq %r8, %r9 +- /* Don't use non-temporal store if there is overlap between +- destination and source since destination may be in cache when +- source is loaded. */ +- cmpq %r9, %rdx +- ja L(more_8x_vec_check) ++ /* To reach this point it is impossible for dst > src and ++ overlap. Remaining to check is src > dst and overlap. rcx ++ already contains dst - src. Negate rcx to get src - dst. If ++ length > rcx then there is overlap and forward copy is best. */ ++ negq %rcx ++ cmpq %rcx, %rdx ++ ja L(more_8x_vec_forward) + + /* Cache align destination. First store the first 64 bytes then + adjust alignments. */ +- VMOVU (%rsi), %VEC(8) +-#if VEC_SIZE < 64 +- VMOVU VEC_SIZE(%rsi), %VEC(9) +-#if VEC_SIZE < 32 +- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(10) +- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(11) +-#endif +-#endif +- VMOVU %VEC(8), (%rdi) +-#if VEC_SIZE < 64 +- VMOVU %VEC(9), VEC_SIZE(%rdi) +-#if VEC_SIZE < 32 +- VMOVU %VEC(10), (VEC_SIZE * 2)(%rdi) +- VMOVU %VEC(11), (VEC_SIZE * 3)(%rdi) +-#endif +-#endif ++ ++ /* First vec was also loaded into VEC(0). */ ++# if VEC_SIZE < 64 ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# if VEC_SIZE < 32 ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) ++# endif ++# endif ++ VMOVU %VEC(0), (%rdi) ++# if VEC_SIZE < 64 ++ VMOVU %VEC(1), VEC_SIZE(%rdi) ++# if VEC_SIZE < 32 ++ VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi) ++ VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi) ++# endif ++# endif ++ + /* Adjust source, destination, and size. */ + movq %rdi, %r8 + andq $63, %r8 +@@ -614,9 +767,13 @@ L(large_memcpy_2x): + /* Adjust length. */ + addq %r8, %rdx + +- /* Test if source and destination addresses will alias. If they do +- the larger pipeline in large_memcpy_4x alleviated the ++ /* Test if source and destination addresses will alias. If they ++ do the larger pipeline in large_memcpy_4x alleviated the + performance drop. */ ++ ++ /* ecx contains -(dst - src). not ecx will return dst - src - 1 ++ which works for testing aliasing. */ ++ notl %ecx + testl $(PAGE_SIZE - VEC_SIZE * 8), %ecx + jz L(large_memcpy_4x) + +@@ -704,8 +861,8 @@ L(loop_large_memcpy_4x_outer): + /* ecx stores inner loop counter. */ + movl $(PAGE_SIZE / LARGE_LOAD_SIZE), %ecx + L(loop_large_memcpy_4x_inner): +- /* Only one prefetch set per page as doing 4 pages give more time +- for prefetcher to keep up. */ ++ /* Only one prefetch set per page as doing 4 pages give more ++ time for prefetcher to keep up. */ + PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE) + PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE) + PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE * 2 + PREFETCHED_LOAD_SIZE) diff --git a/SOURCES/glibc-upstream-2.34-182.patch b/SOURCES/glibc-upstream-2.34-182.patch new file mode 100644 index 0000000..563ff9d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-182.patch @@ -0,0 +1,131 @@ +commit cecbac52123456e2fbcff062a4165bf7b9174797 +Author: Noah Goldstein +Date: Mon Nov 1 00:49:52 2021 -0500 + + x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h + + No bug. + + This patch doubles the rep_movsb_threshold when using ERMS. Based on + benchmarks the vector copy loop, especially now that it handles 4k + aliasing, is better for these medium ranged. + + On Skylake with ERMS: + + Size, Align1, Align2, dst>src,(rep movsb) / (vec copy) + 4096, 0, 0, 0, 0.975 + 4096, 0, 0, 1, 0.953 + 4096, 12, 0, 0, 0.969 + 4096, 12, 0, 1, 0.872 + 4096, 44, 0, 0, 0.979 + 4096, 44, 0, 1, 0.83 + 4096, 0, 12, 0, 1.006 + 4096, 0, 12, 1, 0.989 + 4096, 0, 44, 0, 0.739 + 4096, 0, 44, 1, 0.942 + 4096, 12, 12, 0, 1.009 + 4096, 12, 12, 1, 0.973 + 4096, 44, 44, 0, 0.791 + 4096, 44, 44, 1, 0.961 + 4096, 2048, 0, 0, 0.978 + 4096, 2048, 0, 1, 0.951 + 4096, 2060, 0, 0, 0.986 + 4096, 2060, 0, 1, 0.963 + 4096, 2048, 12, 0, 0.971 + 4096, 2048, 12, 1, 0.941 + 4096, 2060, 12, 0, 0.977 + 4096, 2060, 12, 1, 0.949 + 8192, 0, 0, 0, 0.85 + 8192, 0, 0, 1, 0.845 + 8192, 13, 0, 0, 0.937 + 8192, 13, 0, 1, 0.939 + 8192, 45, 0, 0, 0.932 + 8192, 45, 0, 1, 0.927 + 8192, 0, 13, 0, 0.621 + 8192, 0, 13, 1, 0.62 + 8192, 0, 45, 0, 0.53 + 8192, 0, 45, 1, 0.516 + 8192, 13, 13, 0, 0.664 + 8192, 13, 13, 1, 0.659 + 8192, 45, 45, 0, 0.593 + 8192, 45, 45, 1, 0.575 + 8192, 2048, 0, 0, 0.854 + 8192, 2048, 0, 1, 0.834 + 8192, 2061, 0, 0, 0.863 + 8192, 2061, 0, 1, 0.857 + 8192, 2048, 13, 0, 0.63 + 8192, 2048, 13, 1, 0.629 + 8192, 2061, 13, 0, 0.627 + 8192, 2061, 13, 1, 0.62 + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit 475b63702ef38b69558fc3d31a0b66776a70f1d3) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index e6c94dfd023a25dc..2e43e67e4f4037d3 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -866,12 +866,14 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ + unsigned int minimum_rep_movsb_threshold; + #endif +- /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ ++ /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for ++ VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB ++ threshold is 2048 * (VEC_SIZE / 16). */ + unsigned int rep_movsb_threshold; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) + { +- rep_movsb_threshold = 2048 * (64 / 16); ++ rep_movsb_threshold = 4096 * (64 / 16); + #if HAVE_TUNABLES + minimum_rep_movsb_threshold = 64 * 8; + #endif +@@ -879,7 +881,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + else if (CPU_FEATURE_PREFERRED_P (cpu_features, + AVX_Fast_Unaligned_Load)) + { +- rep_movsb_threshold = 2048 * (32 / 16); ++ rep_movsb_threshold = 4096 * (32 / 16); + #if HAVE_TUNABLES + minimum_rep_movsb_threshold = 32 * 8; + #endif +diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list +index dd6e1d65c9490d4f..419313804d49cf65 100644 +--- a/sysdeps/x86/dl-tunables.list ++++ b/sysdeps/x86/dl-tunables.list +@@ -32,17 +32,21 @@ glibc { + } + x86_rep_movsb_threshold { + type: SIZE_T +- # Since there is overhead to set up REP MOVSB operation, REP MOVSB +- # isn't faster on short data. The memcpy micro benchmark in glibc +- # shows that 2KB is the approximate value above which REP MOVSB +- # becomes faster than SSE2 optimization on processors with Enhanced +- # REP MOVSB. Since larger register size can move more data with a +- # single load and store, the threshold is higher with larger register +- # size. Note: Since the REP MOVSB threshold must be greater than 8 +- # times of vector size and the default value is 2048 * (vector size +- # / 16), the default value and the minimum value must be updated at +- # run-time. NB: Don't set the default value since we can't tell if +- # the tunable value is set by user or not [BZ #27069]. ++ # Since there is overhead to set up REP MOVSB operation, REP ++ # MOVSB isn't faster on short data. The memcpy micro benchmark ++ # in glibc shows that 2KB is the approximate value above which ++ # REP MOVSB becomes faster than SSE2 optimization on processors ++ # with Enhanced REP MOVSB. Since larger register size can move ++ # more data with a single load and store, the threshold is ++ # higher with larger register size. Micro benchmarks show AVX ++ # REP MOVSB becomes faster apprximately at 8KB. The AVX512 ++ # threshold is extrapolated to 16KB. For machines with FSRM the ++ # threshold is universally set at 2112 bytes. Note: Since the ++ # REP MOVSB threshold must be greater than 8 times of vector ++ # size and the default value is 4096 * (vector size / 16), the ++ # default value and the minimum value must be updated at ++ # run-time. NB: Don't set the default value since we can't tell ++ # if the tunable value is set by user or not [BZ #27069]. + minval: 1 + } + x86_rep_stosb_threshold { diff --git a/SOURCES/glibc-upstream-2.34-183.patch b/SOURCES/glibc-upstream-2.34-183.patch new file mode 100644 index 0000000..a1a7285 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-183.patch @@ -0,0 +1,2423 @@ +commit 7cb126e7e7febf9dc3e369cc3e4885e34fb9433b +Author: Noah Goldstein +Date: Wed Nov 10 16:18:56 2021 -0600 + + x86: Shrink memcmp-sse4.S code size + + No bug. + + This implementation refactors memcmp-sse4.S primarily with minimizing + code size in mind. It does this by removing the lookup table logic and + removing the unrolled check from (256, 512] bytes. + + memcmp-sse4 code size reduction : -3487 bytes + wmemcmp-sse4 code size reduction: -1472 bytes + + The current memcmp-sse4.S implementation has a large code size + cost. This has serious adverse affects on the ICache / ITLB. While + in micro-benchmarks the implementations appears fast, traces of + real-world code have shown that the speed in micro benchmarks does not + translate when the ICache/ITLB are not primed, and that the cost + of the code size has measurable negative affects on overall + application performance. + + See https://research.google/pubs/pub48320/ for more details. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit 2f9062d7171850451e6044ef78d91ff8c017b9c0) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S +index b7ac034569ec6178..97c102a9c5ab2b91 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S ++++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S +@@ -25,14 +25,14 @@ + # define MEMCMP __memcmp_sse4_1 + # endif + +-# define JMPTBL(I, B) (I - B) ++#ifdef USE_AS_WMEMCMP ++# define CMPEQ pcmpeqd ++# define CHAR_SIZE 4 ++#else ++# define CMPEQ pcmpeqb ++# define CHAR_SIZE 1 ++#endif + +-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ +- lea TABLE(%rip), %r11; \ +- movslq (%r11, INDEX, SCALE), %rcx; \ +- add %r11, %rcx; \ +- _CET_NOTRACK jmp *%rcx; \ +- ud2 + + /* Warning! + wmemcmp has to use SIGNED comparison for elements. +@@ -47,33 +47,253 @@ ENTRY (MEMCMP) + /* Clear the upper 32 bits. */ + mov %edx, %edx + # endif +- pxor %xmm0, %xmm0 + cmp $79, %RDX_LP + ja L(79bytesormore) ++ ++ cmp $CHAR_SIZE, %RDX_LP ++ jbe L(firstbyte) ++ ++ /* N in (CHAR_SIZE, 79) bytes. */ ++ cmpl $32, %edx ++ ja L(more_32_bytes) ++ ++ cmpl $16, %edx ++ jae L(16_to_32_bytes) ++ + # ifndef USE_AS_WMEMCMP +- cmp $1, %RDX_LP +- je L(firstbyte) ++ cmpl $8, %edx ++ jae L(8_to_16_bytes) ++ ++ cmpl $4, %edx ++ jb L(2_to_3_bytes) ++ ++ movl (%rdi), %eax ++ movl (%rsi), %ecx ++ ++ bswap %eax ++ bswap %ecx ++ ++ shlq $32, %rax ++ shlq $32, %rcx ++ ++ movl -4(%rdi, %rdx), %edi ++ movl -4(%rsi, %rdx), %esi ++ ++ bswap %edi ++ bswap %esi ++ ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ cmovne %edx, %eax ++ sbbl %ecx, %ecx ++ orl %ecx, %eax ++ ret ++ ++ .p2align 4,, 8 ++L(2_to_3_bytes): ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ shll $8, %eax ++ shll $8, %ecx ++ bswap %eax ++ bswap %ecx ++ movzbl -1(%rdi, %rdx), %edi ++ movzbl -1(%rsi, %rdx), %esi ++ orl %edi, %eax ++ orl %esi, %ecx ++ subl %ecx, %eax ++ ret ++ ++ .p2align 4,, 8 ++L(8_to_16_bytes): ++ movq (%rdi), %rax ++ movq (%rsi), %rcx ++ ++ bswap %rax ++ bswap %rcx ++ ++ subq %rcx, %rax ++ jne L(8_to_16_bytes_done) ++ ++ movq -8(%rdi, %rdx), %rax ++ movq -8(%rsi, %rdx), %rcx ++ ++ bswap %rax ++ bswap %rcx ++ ++ subq %rcx, %rax ++ ++L(8_to_16_bytes_done): ++ cmovne %edx, %eax ++ sbbl %ecx, %ecx ++ orl %ecx, %eax ++ ret ++# else ++ xorl %eax, %eax ++ movl (%rdi), %ecx ++ cmpl (%rsi), %ecx ++ jne L(8_to_16_bytes_done) ++ movl 4(%rdi), %ecx ++ cmpl 4(%rsi), %ecx ++ jne L(8_to_16_bytes_done) ++ movl -4(%rdi, %rdx), %ecx ++ cmpl -4(%rsi, %rdx), %ecx ++ jne L(8_to_16_bytes_done) ++ ret + # endif +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) + +-# ifndef USE_AS_WMEMCMP +- .p2align 4 ++ .p2align 4,, 3 ++L(ret_zero): ++ xorl %eax, %eax ++L(zero): ++ ret ++ ++ .p2align 4,, 8 + L(firstbyte): ++ jb L(ret_zero) ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl (%rdi), %ecx ++ cmpl (%rsi), %ecx ++ je L(zero) ++L(8_to_16_bytes_done): ++ setg %al ++ leal -1(%rax, %rax), %eax ++# else + movzbl (%rdi), %eax + movzbl (%rsi), %ecx + sub %ecx, %eax ++# endif + ret ++ ++ .p2align 4 ++L(vec_return_begin_48): ++ addq $16, %rdi ++ addq $16, %rsi ++L(vec_return_begin_32): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl 32(%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl 32(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl 32(%rsi, %rax), %ecx ++ movzbl 32(%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(vec_return_begin_16): ++ addq $16, %rdi ++ addq $16, %rsi ++L(vec_return_begin): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl (%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(vec_return_end_16): ++ subl $16, %edx ++L(vec_return_end): ++ bsfl %eax, %eax ++ addl %edx, %eax ++# ifdef USE_AS_WMEMCMP ++ movl -16(%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl -16(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl -16(%rsi, %rax), %ecx ++ movzbl -16(%rdi, %rax), %eax ++ subl %ecx, %eax + # endif ++ ret ++ ++ .p2align 4,, 8 ++L(more_32_bytes): ++ movdqu (%rdi), %xmm0 ++ movdqu (%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm0 ++ movdqu 16(%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ cmpl $64, %edx ++ jbe L(32_to_64_bytes) ++ movdqu 32(%rdi), %xmm0 ++ movdqu 32(%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ .p2align 4,, 6 ++L(32_to_64_bytes): ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret ++ ++ .p2align 4 ++L(16_to_32_bytes): ++ movdqu (%rdi), %xmm0 ++ movdqu (%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret ++ + + .p2align 4 + L(79bytesormore): ++ movdqu (%rdi), %xmm0 + movdqu (%rsi), %xmm1 +- movdqu (%rdi), %xmm2 +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ + mov %rsi, %rcx + and $-16, %rsi + add $16, %rsi +@@ -86,1694 +306,499 @@ L(79bytesormore): + + cmp $128, %rdx + ja L(128bytesormore) +-L(less128bytes): +- sub $64, %rdx +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) + +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- cmp $32, %rdx +- jb L(less32bytesin64) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin64): +- add $64, %rdi +- add $64, %rsi +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ .p2align 4,, 6 ++L(less128bytes): ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ cmp $96, %rdx ++ jb L(32_to_64_bytes) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ subq $64, %rdx ++ ++ .p2align 4,, 6 ++L(last_64_bytes): ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + ++ .p2align 4 + L(128bytesormore): +- cmp $512, %rdx +- ja L(512bytesormore) + cmp $256, %rdx +- ja L(less512bytes) ++ ja L(unaligned_loop) + L(less256bytes): +- sub $128, %rdx +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqu 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqu 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- add $128, %rsi +- add $128, %rdi +- +- cmp $64, %rdx +- jae L(less128bytes) +- +- cmp $32, %rdx +- jb L(less32bytesin128) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin128): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +- +-L(less512bytes): +- sub $256, %rdx +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqu 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqu 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- movdqu 128(%rdi), %xmm2 +- pxor 128(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(144bytesin256) +- +- movdqu 144(%rdi), %xmm2 +- pxor 144(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(160bytesin256) +- +- movdqu 160(%rdi), %xmm2 +- pxor 160(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(176bytesin256) +- +- movdqu 176(%rdi), %xmm2 +- pxor 176(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(192bytesin256) +- +- movdqu 192(%rdi), %xmm2 +- pxor 192(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(208bytesin256) +- +- movdqu 208(%rdi), %xmm2 +- pxor 208(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(224bytesin256) +- +- movdqu 224(%rdi), %xmm2 +- pxor 224(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(240bytesin256) +- +- movdqu 240(%rdi), %xmm2 +- pxor 240(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(256bytesin256) +- +- add $256, %rsi +- add $256, %rdi +- +- cmp $128, %rdx +- jae L(less256bytes) ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $-128, %rdx ++ subq $-64, %rsi ++ subq $-64, %rdi + + cmp $64, %rdx +- jae L(less128bytes) ++ ja L(less128bytes) + + cmp $32, %rdx +- jb L(less32bytesin256) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin256): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ ja L(last_64_bytes) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 +-L(512bytesormore): ++L(unaligned_loop): + # ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %R8_LP + # else + mov __x86_data_cache_size_half(%rip), %R8_LP + # endif +- mov %r8, %r9 +- shr $1, %r8 +- add %r9, %r8 +- cmp %r8, %rdx +- ja L(L2_L3_cache_unaglined) ++ movq %r8, %r9 ++ addq %r8, %r8 ++ addq %r9, %r8 ++ cmpq %r8, %rdx ++ ja L(L2_L3_cache_unaligned) + sub $64, %rdx + .p2align 4 + L(64bytesormore_loop): +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 + +- movdqu 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqu 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- movdqu 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(64bytesormore_loop) ++ ja L(64bytesormore_loop) + +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ .p2align 4,, 6 ++L(loop_tail): ++ addq %rdx, %rdi ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 ++ ++ addq %rdx, %rsi ++ movdqu (%rsi), %xmm4 ++ movdqu 16(%rsi), %xmm5 ++ movdqu 32(%rsi), %xmm6 ++ movdqu 48(%rsi), %xmm7 ++ ++ CMPEQ %xmm4, %xmm0 ++ CMPEQ %xmm5, %xmm1 ++ CMPEQ %xmm6, %xmm2 ++ CMPEQ %xmm7, %xmm3 ++ ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 ++ ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) ++ ret + +-L(L2_L3_cache_unaglined): +- sub $64, %rdx ++L(L2_L3_cache_unaligned): ++ subq $64, %rdx + .p2align 4 + L(L2_L3_unaligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 + +- movdqu 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 ++ ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqu 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- movdqu 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(L2_L3_unaligned_128bytes_loop) ++ ja L(L2_L3_unaligned_128bytes_loop) ++ jmp L(loop_tail) + +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) + +-/* +- * This case is for machines which are sensitive for unaligned instructions. +- */ ++ /* This case is for machines which are sensitive for unaligned ++ * instructions. */ + .p2align 4 + L(2aligned): + cmp $128, %rdx + ja L(128bytesormorein2aligned) + L(less128bytesin2aligned): +- sub $64, %rdx +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- cmp $32, %rdx +- jb L(less32bytesin64in2alinged) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin64in2alinged): +- add $64, %rdi +- add $64, %rsi +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ cmp $96, %rdx ++ jb L(32_to_64_bytes) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ subq $64, %rdx ++ ++ .p2align 4,, 6 ++L(aligned_last_64_bytes): ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 + L(128bytesormorein2aligned): +- cmp $512, %rdx +- ja L(512bytesormorein2aligned) + cmp $256, %rdx +- ja L(256bytesormorein2aligned) ++ ja L(aligned_loop) + L(less256bytesin2alinged): +- sub $128, %rdx +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqa 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqa 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- add $128, %rsi +- add $128, %rdi ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $-128, %rdx ++ subq $-64, %rsi ++ subq $-64, %rdi + + cmp $64, %rdx +- jae L(less128bytesin2aligned) ++ ja L(less128bytesin2aligned) + + cmp $32, %rdx +- jb L(less32bytesin128in2aligned) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin128in2aligned): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +- +- .p2align 4 +-L(256bytesormorein2aligned): +- +- sub $256, %rdx +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqa 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqa 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- movdqa 128(%rdi), %xmm2 +- pxor 128(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(144bytesin256) +- +- movdqa 144(%rdi), %xmm2 +- pxor 144(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(160bytesin256) +- +- movdqa 160(%rdi), %xmm2 +- pxor 160(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(176bytesin256) +- +- movdqa 176(%rdi), %xmm2 +- pxor 176(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(192bytesin256) +- +- movdqa 192(%rdi), %xmm2 +- pxor 192(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(208bytesin256) +- +- movdqa 208(%rdi), %xmm2 +- pxor 208(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(224bytesin256) +- +- movdqa 224(%rdi), %xmm2 +- pxor 224(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(240bytesin256) +- +- movdqa 240(%rdi), %xmm2 +- pxor 240(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(256bytesin256) +- +- add $256, %rsi +- add $256, %rdi +- +- cmp $128, %rdx +- jae L(less256bytesin2alinged) +- +- cmp $64, %rdx +- jae L(less128bytesin2aligned) +- +- cmp $32, %rdx +- jb L(less32bytesin256in2alinged) +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin256in2alinged): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ ja L(aligned_last_64_bytes) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 +-L(512bytesormorein2aligned): ++L(aligned_loop): + # ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %R8_LP + # else + mov __x86_data_cache_size_half(%rip), %R8_LP + # endif +- mov %r8, %r9 +- shr $1, %r8 +- add %r9, %r8 +- cmp %r8, %rdx +- ja L(L2_L3_cache_aglined) ++ movq %r8, %r9 ++ addq %r8, %r8 ++ addq %r9, %r8 ++ cmpq %r8, %rdx ++ ja L(L2_L3_cache_aligned) + + sub $64, %rdx + .p2align 4 + L(64bytesormore_loopin2aligned): +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 +- +- movdqa 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqa (%rdi), %xmm0 ++ movdqa 16(%rdi), %xmm1 ++ movdqa 32(%rdi), %xmm2 ++ movdqa 48(%rdi), %xmm3 + +- movdqa 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqa 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(64bytesormore_loopin2aligned) +- +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +-L(L2_L3_cache_aglined): +- sub $64, %rdx ++ ja L(64bytesormore_loopin2aligned) ++ jmp L(loop_tail) + ++L(L2_L3_cache_aligned): ++ subq $64, %rdx + .p2align 4 + L(L2_L3_aligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 +- +- movdqa 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqa (%rdi), %xmm0 ++ movdqa 16(%rdi), %xmm1 ++ movdqa 32(%rdi), %xmm2 ++ movdqa 48(%rdi), %xmm3 + +- movdqa 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqa 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) +- add $64, %rsi +- add $64, %rdi +- sub $64, %rdx +- jae L(L2_L3_aligned_128bytes_loop) +- +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + ++ addq $64, %rsi ++ addq $64, %rdi ++ subq $64, %rdx ++ ja L(L2_L3_aligned_128bytes_loop) ++ jmp L(loop_tail) + + .p2align 4 + L(64bytesormore_loop_end): +- add $16, %rdi +- add $16, %rsi +- ptest %xmm2, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- ptest %xmm3, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- ptest %xmm4, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- jmp L(16bytes) +- +-L(256bytesin256): +- add $256, %rdi +- add $256, %rsi +- jmp L(16bytes) +-L(240bytesin256): +- add $240, %rdi +- add $240, %rsi +- jmp L(16bytes) +-L(224bytesin256): +- add $224, %rdi +- add $224, %rsi +- jmp L(16bytes) +-L(208bytesin256): +- add $208, %rdi +- add $208, %rsi +- jmp L(16bytes) +-L(192bytesin256): +- add $192, %rdi +- add $192, %rsi +- jmp L(16bytes) +-L(176bytesin256): +- add $176, %rdi +- add $176, %rsi +- jmp L(16bytes) +-L(160bytesin256): +- add $160, %rdi +- add $160, %rsi +- jmp L(16bytes) +-L(144bytesin256): +- add $144, %rdi +- add $144, %rsi +- jmp L(16bytes) +-L(128bytesin256): +- add $128, %rdi +- add $128, %rsi +- jmp L(16bytes) +-L(112bytesin256): +- add $112, %rdi +- add $112, %rsi +- jmp L(16bytes) +-L(96bytesin256): +- add $96, %rdi +- add $96, %rsi +- jmp L(16bytes) +-L(80bytesin256): +- add $80, %rdi +- add $80, %rsi +- jmp L(16bytes) +-L(64bytesin256): +- add $64, %rdi +- add $64, %rsi +- jmp L(16bytes) +-L(48bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(32bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(16bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(16bytes): +- mov -16(%rdi), %rax +- mov -16(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(8bytes): +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(12bytes): +- mov -12(%rdi), %rax +- mov -12(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(4bytes): +- mov -4(%rsi), %ecx +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +-L(0bytes): +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal case for wmemcmp */ +- .p2align 4 +-L(65bytes): +- movdqu -65(%rdi), %xmm1 +- movdqu -65(%rsi), %xmm2 +- mov $-65, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(49bytes): +- movdqu -49(%rdi), %xmm1 +- movdqu -49(%rsi), %xmm2 +- mov $-49, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(33bytes): +- movdqu -33(%rdi), %xmm1 +- movdqu -33(%rsi), %xmm2 +- mov $-33, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(17bytes): +- mov -17(%rdi), %rax +- mov -17(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(9bytes): +- mov -9(%rdi), %rax +- mov -9(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %edx +- sub %edx, %eax +- ret +- +- .p2align 4 +-L(13bytes): +- mov -13(%rdi), %rax +- mov -13(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(5bytes): +- mov -5(%rdi), %eax +- mov -5(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %edx +- sub %edx, %eax +- ret +- +- .p2align 4 +-L(66bytes): +- movdqu -66(%rdi), %xmm1 +- movdqu -66(%rsi), %xmm2 +- mov $-66, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(50bytes): +- movdqu -50(%rdi), %xmm1 +- movdqu -50(%rsi), %xmm2 +- mov $-50, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(34bytes): +- movdqu -34(%rdi), %xmm1 +- movdqu -34(%rsi), %xmm2 +- mov $-34, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(18bytes): +- mov -18(%rdi), %rax +- mov -18(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(10bytes): +- mov -10(%rdi), %rax +- mov -10(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzwl -2(%rdi), %eax +- movzwl -2(%rsi), %ecx +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(14bytes): +- mov -14(%rdi), %rax +- mov -14(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(6bytes): +- mov -6(%rdi), %eax +- mov -6(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +-L(2bytes): +- movzwl -2(%rsi), %ecx +- movzwl -2(%rdi), %eax +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(67bytes): +- movdqu -67(%rdi), %xmm2 +- movdqu -67(%rsi), %xmm1 +- mov $-67, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(51bytes): +- movdqu -51(%rdi), %xmm2 +- movdqu -51(%rsi), %xmm1 +- mov $-51, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(35bytes): +- movdqu -35(%rsi), %xmm1 +- movdqu -35(%rdi), %xmm2 +- mov $-35, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(19bytes): +- mov -19(%rdi), %rax +- mov -19(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(11bytes): +- mov -11(%rdi), %rax +- mov -11(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(15bytes): +- mov -15(%rdi), %rax +- mov -15(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(7bytes): +- mov -7(%rdi), %eax +- mov -7(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(3bytes): +- movzwl -3(%rdi), %eax +- movzwl -3(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin2bytes) +-L(1bytes): +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %ecx +- sub %ecx, %eax +- ret +-# endif +- +- .p2align 4 +-L(68bytes): +- movdqu -68(%rdi), %xmm2 +- movdqu -68(%rsi), %xmm1 +- mov $-68, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(52bytes): +- movdqu -52(%rdi), %xmm2 +- movdqu -52(%rsi), %xmm1 +- mov $-52, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(36bytes): +- movdqu -36(%rdi), %xmm2 +- movdqu -36(%rsi), %xmm1 +- mov $-36, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(20bytes): +- movdqu -20(%rdi), %xmm2 +- movdqu -20(%rsi), %xmm1 +- mov $-20, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -4(%rsi), %ecx +- +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(69bytes): +- movdqu -69(%rsi), %xmm1 +- movdqu -69(%rdi), %xmm2 +- mov $-69, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(53bytes): +- movdqu -53(%rsi), %xmm1 +- movdqu -53(%rdi), %xmm2 +- mov $-53, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(37bytes): +- movdqu -37(%rsi), %xmm1 +- movdqu -37(%rdi), %xmm2 +- mov $-37, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(21bytes): +- movdqu -21(%rsi), %xmm1 +- movdqu -21(%rdi), %xmm2 +- mov $-21, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(70bytes): +- movdqu -70(%rsi), %xmm1 +- movdqu -70(%rdi), %xmm2 +- mov $-70, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(54bytes): +- movdqu -54(%rsi), %xmm1 +- movdqu -54(%rdi), %xmm2 +- mov $-54, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(38bytes): +- movdqu -38(%rsi), %xmm1 +- movdqu -38(%rdi), %xmm2 +- mov $-38, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(22bytes): +- movdqu -22(%rsi), %xmm1 +- movdqu -22(%rdi), %xmm2 +- mov $-22, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(71bytes): +- movdqu -71(%rsi), %xmm1 +- movdqu -71(%rdi), %xmm2 +- mov $-71, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(55bytes): +- movdqu -55(%rdi), %xmm2 +- movdqu -55(%rsi), %xmm1 +- mov $-55, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(39bytes): +- movdqu -39(%rdi), %xmm2 +- movdqu -39(%rsi), %xmm1 +- mov $-39, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(23bytes): +- movdqu -23(%rdi), %xmm2 +- movdqu -23(%rsi), %xmm1 +- mov $-23, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +-# endif +- +- .p2align 4 +-L(72bytes): +- movdqu -72(%rsi), %xmm1 +- movdqu -72(%rdi), %xmm2 +- mov $-72, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(56bytes): +- movdqu -56(%rdi), %xmm2 +- movdqu -56(%rsi), %xmm1 +- mov $-56, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(40bytes): +- movdqu -40(%rdi), %xmm2 +- movdqu -40(%rsi), %xmm1 +- mov $-40, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(24bytes): +- movdqu -24(%rdi), %xmm2 +- movdqu -24(%rsi), %xmm1 +- mov $-24, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -8(%rsi), %rcx +- mov -8(%rdi), %rax +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(73bytes): +- movdqu -73(%rsi), %xmm1 +- movdqu -73(%rdi), %xmm2 +- mov $-73, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(57bytes): +- movdqu -57(%rdi), %xmm2 +- movdqu -57(%rsi), %xmm1 +- mov $-57, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(41bytes): +- movdqu -41(%rdi), %xmm2 +- movdqu -41(%rsi), %xmm1 +- mov $-41, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(25bytes): +- movdqu -25(%rdi), %xmm2 +- movdqu -25(%rsi), %xmm1 +- mov $-25, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -9(%rdi), %rax +- mov -9(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(74bytes): +- movdqu -74(%rsi), %xmm1 +- movdqu -74(%rdi), %xmm2 +- mov $-74, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(58bytes): +- movdqu -58(%rdi), %xmm2 +- movdqu -58(%rsi), %xmm1 +- mov $-58, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(42bytes): +- movdqu -42(%rdi), %xmm2 +- movdqu -42(%rsi), %xmm1 +- mov $-42, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(26bytes): +- movdqu -26(%rdi), %xmm2 +- movdqu -26(%rsi), %xmm1 +- mov $-26, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -10(%rdi), %rax +- mov -10(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzwl -2(%rdi), %eax +- movzwl -2(%rsi), %ecx +- jmp L(diffin2bytes) +- +- .p2align 4 +-L(75bytes): +- movdqu -75(%rsi), %xmm1 +- movdqu -75(%rdi), %xmm2 +- mov $-75, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(59bytes): +- movdqu -59(%rdi), %xmm2 +- movdqu -59(%rsi), %xmm1 +- mov $-59, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(43bytes): +- movdqu -43(%rdi), %xmm2 +- movdqu -43(%rsi), %xmm1 +- mov $-43, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(27bytes): +- movdqu -27(%rdi), %xmm2 +- movdqu -27(%rsi), %xmm1 +- mov $-27, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -11(%rdi), %rax +- mov -11(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +-# endif +- .p2align 4 +-L(76bytes): +- movdqu -76(%rsi), %xmm1 +- movdqu -76(%rdi), %xmm2 +- mov $-76, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(60bytes): +- movdqu -60(%rdi), %xmm2 +- movdqu -60(%rsi), %xmm1 +- mov $-60, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(44bytes): +- movdqu -44(%rdi), %xmm2 +- movdqu -44(%rsi), %xmm1 +- mov $-44, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(28bytes): +- movdqu -28(%rdi), %xmm2 +- movdqu -28(%rsi), %xmm1 +- mov $-28, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -12(%rdi), %rax +- mov -12(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rsi), %ecx +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(77bytes): +- movdqu -77(%rsi), %xmm1 +- movdqu -77(%rdi), %xmm2 +- mov $-77, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(61bytes): +- movdqu -61(%rdi), %xmm2 +- movdqu -61(%rsi), %xmm1 +- mov $-61, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(45bytes): +- movdqu -45(%rdi), %xmm2 +- movdqu -45(%rsi), %xmm1 +- mov $-45, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(29bytes): +- movdqu -29(%rdi), %xmm2 +- movdqu -29(%rsi), %xmm1 +- mov $-29, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -13(%rdi), %rax +- mov -13(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(78bytes): +- movdqu -78(%rsi), %xmm1 +- movdqu -78(%rdi), %xmm2 +- mov $-78, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(62bytes): +- movdqu -62(%rdi), %xmm2 +- movdqu -62(%rsi), %xmm1 +- mov $-62, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(46bytes): +- movdqu -46(%rdi), %xmm2 +- movdqu -46(%rsi), %xmm1 +- mov $-46, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(30bytes): +- movdqu -30(%rdi), %xmm2 +- movdqu -30(%rsi), %xmm1 +- mov $-30, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -14(%rdi), %rax +- mov -14(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(79bytes): +- movdqu -79(%rsi), %xmm1 +- movdqu -79(%rdi), %xmm2 +- mov $-79, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(63bytes): +- movdqu -63(%rdi), %xmm2 +- movdqu -63(%rsi), %xmm1 +- mov $-63, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(47bytes): +- movdqu -47(%rdi), %xmm2 +- movdqu -47(%rsi), %xmm1 +- mov $-47, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(31bytes): +- movdqu -31(%rdi), %xmm2 +- movdqu -31(%rsi), %xmm1 +- mov $-31, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -15(%rdi), %rax +- mov -15(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +-# endif +- .p2align 4 +-L(64bytes): +- movdqu -64(%rdi), %xmm2 +- movdqu -64(%rsi), %xmm1 +- mov $-64, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(48bytes): +- movdqu -48(%rdi), %xmm2 +- movdqu -48(%rsi), %xmm1 +- mov $-48, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(32bytes): +- movdqu -32(%rdi), %xmm2 +- movdqu -32(%rsi), %xmm1 +- mov $-32, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -16(%rdi), %rax +- mov -16(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +-/* +- * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. +- */ +- .p2align 3 +-L(less16bytes): +- movsbq %dl, %rdx +- mov (%rsi, %rdx), %rcx +- mov (%rdi, %rdx), %rax +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov 8(%rsi, %rdx), %rcx +- mov 8(%rdi, %rdx), %rax +-L(diffin8bytes): +- cmp %eax, %ecx +- jne L(diffin4bytes) +- shr $32, %rcx +- shr $32, %rax +- ++ pmovmskb %xmm0, %ecx ++ incw %cx ++ jnz L(loop_end_ret) ++ ++ pmovmskb %xmm1, %ecx ++ notw %cx ++ sall $16, %ecx ++ jnz L(loop_end_ret) ++ ++ pmovmskb %xmm2, %ecx ++ notw %cx ++ shlq $32, %rcx ++ jnz L(loop_end_ret) ++ ++ addq $48, %rdi ++ addq $48, %rsi ++ movq %rax, %rcx ++ ++ .p2align 4,, 6 ++L(loop_end_ret): ++ bsfq %rcx, %rcx + # ifdef USE_AS_WMEMCMP +-/* for wmemcmp */ +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +-# endif +- +-L(diffin4bytes): +-# ifndef USE_AS_WMEMCMP +- cmp %cx, %ax +- jne L(diffin2bytes) +- shr $16, %ecx +- shr $16, %eax +-L(diffin2bytes): +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(end): +- and $0xff, %eax +- and $0xff, %ecx +- sub %ecx, %eax +- ret ++ movl (%rdi, %rcx), %eax ++ xorl %edx, %edx ++ cmpl (%rsi, %rcx), %eax ++ setg %dl ++ leal -1(%rdx, %rdx), %eax + # else +- +-/* for wmemcmp */ +- mov $1, %eax +- jl L(nequal_bigger) +- neg %eax +- ret +- +- .p2align 4 +-L(nequal_bigger): +- ret +- +-L(unreal_case): +- xor %eax, %eax +- ret ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif +- ++ ret + END (MEMCMP) +- +- .section .rodata.sse4.1,"a",@progbits +- .p2align 3 +-# ifndef USE_AS_WMEMCMP +-L(table_64bytes): +- .int JMPTBL (L(0bytes), L(table_64bytes)) +- .int JMPTBL (L(1bytes), L(table_64bytes)) +- .int JMPTBL (L(2bytes), L(table_64bytes)) +- .int JMPTBL (L(3bytes), L(table_64bytes)) +- .int JMPTBL (L(4bytes), L(table_64bytes)) +- .int JMPTBL (L(5bytes), L(table_64bytes)) +- .int JMPTBL (L(6bytes), L(table_64bytes)) +- .int JMPTBL (L(7bytes), L(table_64bytes)) +- .int JMPTBL (L(8bytes), L(table_64bytes)) +- .int JMPTBL (L(9bytes), L(table_64bytes)) +- .int JMPTBL (L(10bytes), L(table_64bytes)) +- .int JMPTBL (L(11bytes), L(table_64bytes)) +- .int JMPTBL (L(12bytes), L(table_64bytes)) +- .int JMPTBL (L(13bytes), L(table_64bytes)) +- .int JMPTBL (L(14bytes), L(table_64bytes)) +- .int JMPTBL (L(15bytes), L(table_64bytes)) +- .int JMPTBL (L(16bytes), L(table_64bytes)) +- .int JMPTBL (L(17bytes), L(table_64bytes)) +- .int JMPTBL (L(18bytes), L(table_64bytes)) +- .int JMPTBL (L(19bytes), L(table_64bytes)) +- .int JMPTBL (L(20bytes), L(table_64bytes)) +- .int JMPTBL (L(21bytes), L(table_64bytes)) +- .int JMPTBL (L(22bytes), L(table_64bytes)) +- .int JMPTBL (L(23bytes), L(table_64bytes)) +- .int JMPTBL (L(24bytes), L(table_64bytes)) +- .int JMPTBL (L(25bytes), L(table_64bytes)) +- .int JMPTBL (L(26bytes), L(table_64bytes)) +- .int JMPTBL (L(27bytes), L(table_64bytes)) +- .int JMPTBL (L(28bytes), L(table_64bytes)) +- .int JMPTBL (L(29bytes), L(table_64bytes)) +- .int JMPTBL (L(30bytes), L(table_64bytes)) +- .int JMPTBL (L(31bytes), L(table_64bytes)) +- .int JMPTBL (L(32bytes), L(table_64bytes)) +- .int JMPTBL (L(33bytes), L(table_64bytes)) +- .int JMPTBL (L(34bytes), L(table_64bytes)) +- .int JMPTBL (L(35bytes), L(table_64bytes)) +- .int JMPTBL (L(36bytes), L(table_64bytes)) +- .int JMPTBL (L(37bytes), L(table_64bytes)) +- .int JMPTBL (L(38bytes), L(table_64bytes)) +- .int JMPTBL (L(39bytes), L(table_64bytes)) +- .int JMPTBL (L(40bytes), L(table_64bytes)) +- .int JMPTBL (L(41bytes), L(table_64bytes)) +- .int JMPTBL (L(42bytes), L(table_64bytes)) +- .int JMPTBL (L(43bytes), L(table_64bytes)) +- .int JMPTBL (L(44bytes), L(table_64bytes)) +- .int JMPTBL (L(45bytes), L(table_64bytes)) +- .int JMPTBL (L(46bytes), L(table_64bytes)) +- .int JMPTBL (L(47bytes), L(table_64bytes)) +- .int JMPTBL (L(48bytes), L(table_64bytes)) +- .int JMPTBL (L(49bytes), L(table_64bytes)) +- .int JMPTBL (L(50bytes), L(table_64bytes)) +- .int JMPTBL (L(51bytes), L(table_64bytes)) +- .int JMPTBL (L(52bytes), L(table_64bytes)) +- .int JMPTBL (L(53bytes), L(table_64bytes)) +- .int JMPTBL (L(54bytes), L(table_64bytes)) +- .int JMPTBL (L(55bytes), L(table_64bytes)) +- .int JMPTBL (L(56bytes), L(table_64bytes)) +- .int JMPTBL (L(57bytes), L(table_64bytes)) +- .int JMPTBL (L(58bytes), L(table_64bytes)) +- .int JMPTBL (L(59bytes), L(table_64bytes)) +- .int JMPTBL (L(60bytes), L(table_64bytes)) +- .int JMPTBL (L(61bytes), L(table_64bytes)) +- .int JMPTBL (L(62bytes), L(table_64bytes)) +- .int JMPTBL (L(63bytes), L(table_64bytes)) +- .int JMPTBL (L(64bytes), L(table_64bytes)) +- .int JMPTBL (L(65bytes), L(table_64bytes)) +- .int JMPTBL (L(66bytes), L(table_64bytes)) +- .int JMPTBL (L(67bytes), L(table_64bytes)) +- .int JMPTBL (L(68bytes), L(table_64bytes)) +- .int JMPTBL (L(69bytes), L(table_64bytes)) +- .int JMPTBL (L(70bytes), L(table_64bytes)) +- .int JMPTBL (L(71bytes), L(table_64bytes)) +- .int JMPTBL (L(72bytes), L(table_64bytes)) +- .int JMPTBL (L(73bytes), L(table_64bytes)) +- .int JMPTBL (L(74bytes), L(table_64bytes)) +- .int JMPTBL (L(75bytes), L(table_64bytes)) +- .int JMPTBL (L(76bytes), L(table_64bytes)) +- .int JMPTBL (L(77bytes), L(table_64bytes)) +- .int JMPTBL (L(78bytes), L(table_64bytes)) +- .int JMPTBL (L(79bytes), L(table_64bytes)) +-# else +-L(table_64bytes): +- .int JMPTBL (L(0bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(4bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(8bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(12bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(16bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(20bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(24bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(28bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(32bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(36bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(40bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(44bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(48bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(52bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(56bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(60bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(64bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(68bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(72bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(76bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +-# endif + #endif diff --git a/SOURCES/glibc-upstream-2.34-184.patch b/SOURCES/glibc-upstream-2.34-184.patch new file mode 100644 index 0000000..805f91e --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-184.patch @@ -0,0 +1,104 @@ +commit 4bbd0f866ad0ff197f72346f776ebee9b7e1a706 +Author: Noah Goldstein +Date: Fri Dec 3 15:29:25 2021 -0800 + + x86-64: Use notl in EVEX strcmp [BZ #28646] + + Must use notl %edi here as lower bits are for CHAR comparisons + potentially out of range thus can be 0 without indicating mismatch. + This fixes BZ #28646. + + Co-Authored-By: H.J. Lu + (cherry picked from commit 4df1fa6ddc8925a75f3da644d5da3bb16eb33f02) + +diff --git a/string/test-strcmp.c b/string/test-strcmp.c +index 7feababf4ddc5603..a0255b9625fbcedd 100644 +--- a/string/test-strcmp.c ++++ b/string/test-strcmp.c +@@ -25,6 +25,7 @@ + # define TEST_NAME "strcmp" + #endif + #include "test-string.h" ++#include + + #ifdef WIDE + # include +@@ -392,6 +393,32 @@ check2 (void) + } + } + ++static void ++check3 (void) ++{ ++ size_t size = 0xd000 + 0x4000; ++ CHAR *s1, *s2; ++ CHAR *buffer1 = mmap (NULL, size, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANON, -1, 0); ++ CHAR *buffer2 = mmap (NULL, size, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANON, -1, 0); ++ if (buffer1 == MAP_FAILED || buffer1 == MAP_FAILED) ++ error (EXIT_UNSUPPORTED, errno, "mmap failed"); ++ ++ s1 = (CHAR *) (buffer1 + 0x8f8 / sizeof (CHAR)); ++ s2 = (CHAR *) (buffer2 + 0xcff3 / sizeof (CHAR)); ++ ++ STRCPY(s1, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/PathDocFileFactory.java")); ++ STRCPY(s2, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/taglets/ThrowsTaglet.java")); ++ ++ int exp_result = SIMPLE_STRCMP (s1, s2); ++ FOR_EACH_IMPL (impl, 0) ++ check_result (impl, s1, s2, exp_result); ++ ++ munmap ((void *) buffer1, size); ++ munmap ((void *) buffer2, size); ++} ++ + int + test_main (void) + { +@@ -400,6 +427,7 @@ test_main (void) + test_init (); + check(); + check2 (); ++ check3 (); + + printf ("%23s", ""); + FOR_EACH_IMPL (impl, 0) +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 82f12ac89bcae20b..6f5c4bf984da2b80 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -656,12 +656,13 @@ L(loop_cross_page): + in YMM3 and 32 bytes at VEC_SIZE(%rdx, %r10). */ + VPCMP $0, VEC_SIZE(%rdx, %r10), %YMM3, %k3{%k4} + kmovd %k3, %edi ++ /* Must use notl %edi here as lower bits are for CHAR ++ comparisons potentially out of range thus can be 0 without ++ indicating mismatch. */ ++ notl %edi + # ifdef USE_AS_WCSCMP + /* Don't use subl since it is the upper 8 bits of EDI below. */ +- notl %edi + andl $0xff, %edi +-# else +- incl %edi + # endif + + # ifdef USE_AS_WCSCMP +@@ -743,12 +744,13 @@ L(loop_cross_page_2_vec): + in YMM1 and 32 bytes at (VEC_SIZE * 3)(%rdx, %r10). */ + VPCMP $0, (VEC_SIZE * 3)(%rdx, %r10), %YMM1, %k3{%k4} + kmovd %k3, %edi ++ /* Must use notl %edi here as lower bits are for CHAR ++ comparisons potentially out of range thus can be 0 without ++ indicating mismatch. */ ++ notl %edi + # ifdef USE_AS_WCSCMP + /* Don't use subl since it is the upper 8 bits of EDI below. */ +- notl %edi + andl $0xff, %edi +-# else +- incl %edi + # endif + + # ifdef USE_AS_WCSCMP diff --git a/SOURCES/glibc-upstream-2.34-185.patch b/SOURCES/glibc-upstream-2.34-185.patch new file mode 100644 index 0000000..f06f86f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-185.patch @@ -0,0 +1,30 @@ +commit f3a99b2216114f89b20329ae7664b764248b4bbd +Author: H.J. Lu +Date: Mon Dec 6 07:14:12 2021 -0800 + + x86: Don't set Prefer_No_AVX512 for processors with AVX512 and AVX-VNNI + + Don't set Prefer_No_AVX512 on processors with AVX512 and AVX-VNNI since + they won't lower CPU frequency when ZMM load and store instructions are + used. + + (cherry picked from commit ceeffe968c01b1202e482f4855cb6baf5c6cb713) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index f4d4049e391cbabd..09590d8794b1c6fb 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -566,8 +566,11 @@ disable_tsx: + |= bit_arch_Prefer_No_VZEROUPPER; + else + { +- cpu_features->preferred[index_arch_Prefer_No_AVX512] +- |= bit_arch_Prefer_No_AVX512; ++ /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency ++ when ZMM load and store instructions are used. */ ++ if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI)) ++ cpu_features->preferred[index_arch_Prefer_No_AVX512] ++ |= bit_arch_Prefer_No_AVX512; + + /* Avoid RTM abort triggered by VZEROUPPER inside a + transactionally executing RTM region. */ diff --git a/SOURCES/glibc-upstream-2.34-186.patch b/SOURCES/glibc-upstream-2.34-186.patch new file mode 100644 index 0000000..a046844 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-186.patch @@ -0,0 +1,384 @@ +commit c796418d00f65c8c5fbed477f3ba6da2bee64ece +Author: Noah Goldstein +Date: Fri Dec 24 18:54:41 2021 -0600 + + x86: Optimize L(less_vec) case in memcmp-evex-movbe.S + + No bug. + Optimizations are twofold. + + 1) Replace page cross and 0/1 checks with masked load instructions in + L(less_vec). In applications this reduces branch-misses in the + hot [0, 32] case. + 2) Change controlflow so that L(less_vec) case gets the fall through. + + Change 2) helps copies in the [0, 32] size range but comes at the cost + of copies in the [33, 64] size range. From profiles of GCC and + Python3, 94%+ and 99%+ of calls are in the [0, 32] range so this + appears to the the right tradeoff. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit abddd61de090ae84e380aff68a98bd94ef704667) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 640f6757fac8a356..d2899e7c7078cd41 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -62,15 +62,18 @@ Latency: + # define VMOVU vmovdqu64 + + # ifdef USE_AS_WMEMCMP ++# define VMOVU_MASK vmovdqu32 + # define CHAR_SIZE 4 + # define VPCMP vpcmpd + # define VPTEST vptestmd + # else ++# define VMOVU_MASK vmovdqu8 + # define CHAR_SIZE 1 + # define VPCMP vpcmpub + # define VPTEST vptestmb + # endif + ++ + # define VEC_SIZE 32 + # define PAGE_SIZE 4096 + # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) +@@ -102,12 +105,48 @@ ENTRY_P2ALIGN (MEMCMP, 6) + movl %edx, %edx + # endif + cmp $CHAR_PER_VEC, %RDX_LP +- jb L(less_vec) ++ /* Fall through for [0, VEC_SIZE] as its the hottest. */ ++ ja L(more_1x_vec) ++ ++ /* Create mask for CHAR's we want to compare. This allows us to ++ avoid having to include page cross logic. */ ++ movl $-1, %ecx ++ bzhil %edx, %ecx, %ecx ++ kmovd %ecx, %k2 ++ ++ /* Safe to load full ymm with mask. */ ++ VMOVU_MASK (%rsi), %YMM2{%k2} ++ VPCMP $4,(%rdi), %YMM2, %k1{%k2} ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(return_vec_0) ++ ret + ++ .p2align 4 ++L(return_vec_0): ++ tzcntl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl (%rdi, %rax, CHAR_SIZE), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi, %rax, CHAR_SIZE), %ecx ++ /* NB: no partial register stall here because xorl zero idiom ++ above. */ ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ ++ .p2align 4 ++L(more_1x_vec): + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM1 + /* Use compare not equals to directly check for mismatch. */ +- VPCMP $4, (%rdi), %YMM1, %k1 ++ VPCMP $4,(%rdi), %YMM1, %k1 + kmovd %k1, %eax + /* NB: eax must be destination register if going to + L(return_vec_[0,2]). For L(return_vec_3) destination register +@@ -131,13 +170,13 @@ ENTRY_P2ALIGN (MEMCMP, 6) + + /* Check third and fourth VEC no matter what. */ + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 +- VPCMP $4, (VEC_SIZE * 2)(%rdi), %YMM3, %k1 ++ VPCMP $4,(VEC_SIZE * 2)(%rdi), %YMM3, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(return_vec_2) + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 +- VPCMP $4, (VEC_SIZE * 3)(%rdi), %YMM4, %k1 ++ VPCMP $4,(VEC_SIZE * 3)(%rdi), %YMM4, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(return_vec_3) +@@ -169,7 +208,7 @@ ENTRY_P2ALIGN (MEMCMP, 6) + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + /* Ternary logic to xor (VEC_SIZE * 3)(%rdi) with YMM4 while + oring with YMM1. Result is stored in YMM4. */ +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 + + /* Or together YMM2, YMM3, and YMM4 into YMM4. */ + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 +@@ -184,7 +223,8 @@ ENTRY_P2ALIGN (MEMCMP, 6) + /* NB: eax must be zero to reach here. */ + ret + +- .p2align 4 ++ ++ .p2align 4,, 8 + L(8x_end_return_vec_0_1_2_3): + movq %rdx, %rdi + L(8x_return_vec_0_1_2_3): +@@ -222,23 +262,6 @@ L(return_vec_3): + # endif + ret + +- .p2align 4 +-L(return_vec_0): +- tzcntl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rax, CHAR_SIZE), %ecx +- xorl %edx, %edx +- cmpl (%rsi, %rax, CHAR_SIZE), %ecx +- /* NB: no partial register stall here because xorl zero idiom +- above. */ +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rsi, %rax), %ecx +- movzbl (%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret + + .p2align 4 + L(return_vec_1): +@@ -297,7 +320,7 @@ L(loop_4x_vec): + VMOVU (VEC_SIZE * 2)(%rsi, %rdi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 + VMOVU (VEC_SIZE * 3)(%rsi, %rdi), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 + VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +@@ -324,7 +347,7 @@ L(loop_4x_vec): + VMOVU VEC_SIZE(%rsi, %rdx), %YMM2 + vpxorq VEC_SIZE(%rdx), %YMM2, %YMM2 + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 + VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +@@ -336,14 +359,14 @@ L(loop_4x_vec): + /* Only entry is from L(more_8x_vec). */ + .p2align 4,, 10 + L(8x_last_2x_vec): +- VPCMP $4, (VEC_SIZE * 2)(%rdx), %YMM3, %k1 ++ VPCMP $4,(VEC_SIZE * 2)(%rdx), %YMM3, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(8x_return_vec_2) + /* Naturally aligned to 16 bytes. */ + L(8x_last_1x_vec): + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM1 +- VPCMP $4, (VEC_SIZE * 3)(%rdx), %YMM1, %k1 ++ VPCMP $4,(VEC_SIZE * 3)(%rdx), %YMM1, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(8x_return_vec_3) +@@ -392,7 +415,9 @@ L(last_1x_vec): + jnz L(return_vec_0_end) + ret + +- .p2align 4,, 10 ++ ++ /* Don't align. Takes 2-fetch blocks either way and aligning ++ will cause code to spill into another cacheline. */ + L(return_vec_1_end): + /* Use bsf to save code size. This is necessary to have + L(one_or_less) fit in aligning bytes between. */ +@@ -411,31 +436,8 @@ L(return_vec_1_end): + # endif + ret + +- /* NB: L(one_or_less) fits in alignment padding between +- L(return_vec_1_end) and L(return_vec_0_end). */ +-# ifdef USE_AS_WMEMCMP +-L(one_or_less): +- jb L(zero) +- movl (%rdi), %ecx +- xorl %edx, %edx +- cmpl (%rsi), %ecx +- je L(zero) +- setg %dl +- leal -1(%rdx, %rdx), %eax +- ret +-# else +-L(one_or_less): +- jb L(zero) +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax +- ret +-# endif +-L(zero): +- xorl %eax, %eax +- ret +- +- .p2align 4 ++ /* Don't align. Takes 2-fetch blocks either way and aligning ++ will cause code to spill into another cacheline. */ + L(return_vec_0_end): + tzcntl %eax, %eax + addl %edx, %eax +@@ -451,146 +453,7 @@ L(return_vec_0_end): + subl %ecx, %eax + # endif + ret ++ /* 1-byte until next cache line. */ + +- .p2align 4 +-L(less_vec): +- /* Check if one or less CHAR. This is necessary for size == 0 +- but is also faster for size == CHAR_SIZE. */ +- cmpl $1, %edx +- jbe L(one_or_less) +- +- /* Check if loading one VEC from either s1 or s2 could cause a +- page cross. This can have false positives but is by far the +- fastest method. */ +- movl %edi, %eax +- orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(page_cross_less_vec) +- +- /* No page cross possible. */ +- VMOVU (%rsi), %YMM2 +- VPCMP $4, (%rdi), %YMM2, %k1 +- kmovd %k1, %eax +- /* Check if any matches where in bounds. Intentionally not +- storing result in eax to limit dependency chain if it goes to +- L(return_vec_0_lv). */ +- bzhil %edx, %eax, %edx +- jnz L(return_vec_0_lv) +- xorl %eax, %eax +- ret +- +- /* Essentially duplicate of L(return_vec_0). Ends up not costing +- any code as shrinks L(less_vec) by allowing 2-byte encoding of +- the jump and ends up fitting in aligning bytes. As well fits on +- same cache line as L(less_vec) so also saves a line from having +- to be fetched on cold calls to memcmp. */ +- .p2align 4,, 4 +-L(return_vec_0_lv): +- tzcntl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rax, CHAR_SIZE), %ecx +- xorl %edx, %edx +- cmpl (%rsi, %rax, CHAR_SIZE), %ecx +- /* NB: no partial register stall here because xorl zero idiom +- above. */ +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rsi, %rax), %ecx +- movzbl (%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret +- +- .p2align 4 +-L(page_cross_less_vec): +- /* if USE_AS_WMEMCMP it can only be 0, 4, 8, 12, 16, 20, 24, 28 +- bytes. */ +- cmpl $(16 / CHAR_SIZE), %edx +- jae L(between_16_31) +-# ifndef USE_AS_WMEMCMP +- cmpl $8, %edx +- jae L(between_8_15) +- cmpl $4, %edx +- jb L(between_2_3) +- +- /* Load as big endian with overlapping movbe to avoid branches. +- */ +- movbe (%rdi), %eax +- movbe (%rsi), %ecx +- shlq $32, %rax +- shlq $32, %rcx +- movbe -4(%rdi, %rdx), %edi +- movbe -4(%rsi, %rdx), %esi +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- /* edx is guranteed to be positive int32 in range [4, 7]. */ +- cmovne %edx, %eax +- /* ecx is -1 if rcx > rax. Otherwise 0. */ +- sbbl %ecx, %ecx +- /* If rcx > rax, then ecx is 0 and eax is positive. If rcx == +- rax then eax and ecx are zero. If rax < rax then ecx is -1 so +- eax doesn't matter. */ +- orl %ecx, %eax +- ret +- +- .p2align 4,, 8 +-L(between_8_15): +-# endif +- /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */ +- vmovq (%rdi), %xmm1 +- vmovq (%rsi), %xmm2 +- VPCMP $4, %xmm1, %xmm2, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_lv) +- /* Use overlapping loads to avoid branches. */ +- vmovq -8(%rdi, %rdx, CHAR_SIZE), %xmm1 +- vmovq -8(%rsi, %rdx, CHAR_SIZE), %xmm2 +- VPCMP $4, %xmm1, %xmm2, %k1 +- addl $(CHAR_PER_VEC - (8 / CHAR_SIZE)), %edx +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_end) +- ret +- +- .p2align 4,, 8 +-L(between_16_31): +- /* From 16 to 31 bytes. No branch when size == 16. */ +- +- /* Use movups to save code size. */ +- vmovdqu (%rsi), %xmm2 +- VPCMP $4, (%rdi), %xmm2, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_lv) +- /* Use overlapping loads to avoid branches. */ +- vmovdqu -16(%rsi, %rdx, CHAR_SIZE), %xmm2 +- VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 +- addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_end) +- ret +- +-# ifndef USE_AS_WMEMCMP +-L(between_2_3): +- /* Load as big endian to avoid branches. */ +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- /* Subtraction is okay because the upper 8 bits are zero. */ +- subl %ecx, %eax +- ret +-# endif + END (MEMCMP) + #endif diff --git a/SOURCES/glibc-upstream-2.34-187.patch b/SOURCES/glibc-upstream-2.34-187.patch new file mode 100644 index 0000000..6186aeb --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-187.patch @@ -0,0 +1,42 @@ +commit 9681691402052b727e01ae3375c73e0f76566593 +Author: Adhemerval Zanella +Date: Wed Apr 27 13:59:26 2022 -0300 + + linux: Fix missing internal 64 bit time_t stat usage + + These are two missing spots initially done by 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 834ddd0432f68d6dc85b6aac95065721af0d86e9) + +diff --git a/sysdeps/unix/sysv/linux/faccessat.c b/sysdeps/unix/sysv/linux/faccessat.c +index 13160d32499c4e58..00e4ce7f80ee2dfe 100644 +--- a/sysdeps/unix/sysv/linux/faccessat.c ++++ b/sysdeps/unix/sysv/linux/faccessat.c +@@ -39,8 +39,8 @@ __faccessat (int fd, const char *file, int mode, int flag) + if ((flag == 0 || ((flag & ~AT_EACCESS) == 0 && ! __libc_enable_secure))) + return INLINE_SYSCALL (faccessat, 3, fd, file, mode); + +- struct stat64 stats; +- if (__fstatat64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW)) ++ struct __stat64_t64 stats; ++ if (__fstatat64_time64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW)) + return -1; + + mode &= (X_OK | W_OK | R_OK); /* Clear any bogus bits. */ +diff --git a/sysdeps/unix/sysv/linux/pathconf.c b/sysdeps/unix/sysv/linux/pathconf.c +index b599a66c930cad4d..f79930303118ebcd 100644 +--- a/sysdeps/unix/sysv/linux/pathconf.c ++++ b/sysdeps/unix/sysv/linux/pathconf.c +@@ -110,8 +110,8 @@ distinguish_extX (const struct statfs *fsbuf, const char *file, int fd) + && strcmp (mntbuf.mnt_type, "ext4") != 0) + continue; + +- struct stat64 fsst; +- if (__stat64 (mntbuf.mnt_dir, &fsst) >= 0 ++ struct __stat64_t64 fsst; ++ if (__stat64_time64 (mntbuf.mnt_dir, &fsst) >= 0 + && st.st_dev == fsst.st_dev) + { + if (strcmp (mntbuf.mnt_type, "ext4") == 0) diff --git a/SOURCES/glibc-upstream-2.34-188.patch b/SOURCES/glibc-upstream-2.34-188.patch new file mode 100644 index 0000000..8b49369 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-188.patch @@ -0,0 +1,39 @@ +commit 55640ed3fde48360a8e8083be4843bd2dc7cecfe +Author: Carlos O'Donell +Date: Tue Apr 26 10:52:41 2022 -0400 + + i386: Regenerate ulps + + These failures were caught while building glibc master for Fedora + Rawhide which is built with '-mtune=generic -msse2 -mfpmath=sse' + using gcc 11.3 (gcc-11.3.1-2.fc35) on a Cascadelake Intel Xeon + processor. + + (cherry picked from commit e465d97653311c3687aee49de782177353acfe86) + +diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps +index 7601049110789201..84e6686eba5fe79a 100644 +--- a/sysdeps/i386/fpu/libm-test-ulps ++++ b/sysdeps/i386/fpu/libm-test-ulps +@@ -668,7 +668,7 @@ ldouble: 4 + + Function: Imaginary part of "clog10": + double: 2 +-float: 1 ++float: 2 + float128: 2 + ldouble: 2 + +diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +index a39c89cec1141935..cc21e6907fe8b6a3 100644 +--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps ++++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +@@ -668,7 +668,7 @@ ldouble: 4 + + Function: Imaginary part of "clog10": + double: 2 +-float: 1 ++float: 2 + float128: 2 + ldouble: 2 + diff --git a/SOURCES/glibc-upstream-2.34-189.patch b/SOURCES/glibc-upstream-2.34-189.patch new file mode 100644 index 0000000..3a5889c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-189.patch @@ -0,0 +1,116 @@ +commit 88a8637cb4658cd91a002659db05867716b88b36 +Author: Adhemerval Zanella +Date: Wed Apr 27 13:40:30 2022 -0300 + + linux: Fix fchmodat with AT_SYMLINK_NOFOLLOW for 64 bit time_t (BZ#29097) + + The AT_SYMLINK_NOFOLLOW emulation ues the default 32 bit stat internal + calls, which fails with EOVERFLOW if the file constains timestamps + beyond 2038. + + Checked on i686-linux-gnu. + + (cherry picked from commit 118a2aee07f64d605b6668cbe195c1f44eac6be6) + +diff --git a/io/Makefile b/io/Makefile +index 9871ecbc74020a6d..01968b81042e01e4 100644 +--- a/io/Makefile ++++ b/io/Makefile +@@ -81,16 +81,17 @@ tests := test-utime test-stat test-stat2 test-lfs tst-getcwd \ + tst-closefrom \ + + tests-time64 := \ ++ tst-fcntl-time64 \ ++ tst-fts-time64 \ + tst-futimens-time64 \ + tst-futimes-time64\ +- tst-fts-time64 \ ++ tst-futimesat-time64 \ ++ tst-lchmod-time64 \ + tst-lutimes-time64 \ + tst-stat-time64 \ +- tst-futimesat-time64 \ + tst-utime-time64 \ + tst-utimensat-time64 \ + tst-utimes-time64 \ +- tst-fcntl-time64 \ + # tests-time64 + + # Likewise for statx, but we do not need static linking here. +@@ -134,6 +135,7 @@ CFLAGS-close.c += -fexceptions -fasynchronous-unwind-tables + + CFLAGS-test-stat.c += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE + CFLAGS-test-lfs.c += -D_LARGEFILE64_SOURCE ++CFLAGS-tst-lchmod.c += -D_FILE_OFFSET_BITS=64 + + test-stat2-ARGS = Makefile . $(objpfx)test-stat2 + +diff --git a/io/tst-lchmod-time64.c b/io/tst-lchmod-time64.c +new file mode 100644 +index 0000000000000000..f2b7cc9d358f2a77 +--- /dev/null ++++ b/io/tst-lchmod-time64.c +@@ -0,0 +1,2 @@ ++#define CHECK_TIME64 ++#include "tst-lchmod.c" +diff --git a/io/tst-lchmod.c b/io/tst-lchmod.c +index 0fe98e01b74b713d..472766b186975922 100644 +--- a/io/tst-lchmod.c ++++ b/io/tst-lchmod.c +@@ -66,10 +66,27 @@ select_path (bool do_relative_path, const char *full_path, const char *relative_ + return full_path; + } + ++static void ++update_file_time_to_y2038 (const char *fname, int flags) ++{ ++#ifdef CHECK_TIME64 ++ /* Y2038 threshold plus 1 second. */ ++ const struct timespec ts[] = { { 0x80000001LL, 0}, { 0x80000001LL } }; ++ TEST_VERIFY_EXIT (utimensat (AT_FDCWD, fname, ts, flags) == 0); ++#endif ++} ++ + static void + test_1 (bool do_relative_path, int (*chmod_func) (int fd, const char *, mode_t, int)) + { + char *tempdir = support_create_temp_directory ("tst-lchmod-"); ++#ifdef CHECK_TIME64 ++ if (!support_path_support_time64 (tempdir)) ++ { ++ puts ("info: test skipped, filesystem does not support 64 bit time_t"); ++ return; ++ } ++#endif + + char *path_dangling = xasprintf ("%s/dangling", tempdir); + char *path_file = xasprintf ("%s/file", tempdir); +@@ -93,9 +110,12 @@ test_1 (bool do_relative_path, int (*chmod_func) (int fd, const char *, mode_t, + xsymlink ("loop", path_loop); + xsymlink ("target-does-not-exist", path_dangling); + ++ update_file_time_to_y2038 (path_file, 0); ++ update_file_time_to_y2038 (path_to_file, AT_SYMLINK_NOFOLLOW); ++ + /* Check that the modes do not collide with what we will use in the + test. */ +- struct stat64 st; ++ struct stat st; + xstat (path_file, &st); + TEST_VERIFY ((st.st_mode & 0777) != 1); + xlstat (path_to_file, &st); +diff --git a/sysdeps/unix/sysv/linux/fchmodat.c b/sysdeps/unix/sysv/linux/fchmodat.c +index 5bd1eb96a5d78130..b0cf61949a9302d9 100644 +--- a/sysdeps/unix/sysv/linux/fchmodat.c ++++ b/sysdeps/unix/sysv/linux/fchmodat.c +@@ -48,8 +48,8 @@ fchmodat (int fd, const char *file, mode_t mode, int flag) + + /* Use fstatat because fstat does not work on O_PATH descriptors + before Linux 3.6. */ +- struct stat64 st; +- if (__fstatat64 (pathfd, "", &st, AT_EMPTY_PATH) != 0) ++ struct __stat64_t64 st; ++ if (__fstatat64_time64 (pathfd, "", &st, AT_EMPTY_PATH) != 0) + { + __close_nocancel (pathfd); + return -1; diff --git a/SOURCES/glibc-upstream-2.34-190.patch b/SOURCES/glibc-upstream-2.34-190.patch new file mode 100644 index 0000000..f21a4bf --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-190.patch @@ -0,0 +1,189 @@ +commit c66c92181ddbd82306537a608e8c0282587131de +Author: DJ Delorie +Date: Wed Mar 30 17:44:02 2022 -0400 + + posix/glob.c: update from gnulib + + Copied from gnulib/lib/glob.c in order to fix rhbz 1982608 + Also fixes swbz 25659 + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 7c477b57a31487eda516db02b9e04f22d1a6e6af) + +diff --git a/posix/glob.c b/posix/glob.c +index 593a4c358f3d42e5..6af310a1aa31401a 100644 +--- a/posix/glob.c ++++ b/posix/glob.c +@@ -21,13 +21,14 @@ + optimizes away the pattern == NULL test below. */ + # define _GL_ARG_NONNULL(params) + +-# include ++# include + + #endif + + #include + + #include ++#include + #include + #include + #include +@@ -56,6 +57,8 @@ + # define sysconf(id) __sysconf (id) + # define closedir(dir) __closedir (dir) + # define opendir(name) __opendir (name) ++# undef dirfd ++# define dirfd(str) __dirfd (str) + # define readdir(str) __readdir64 (str) + # define getpwnam_r(name, bufp, buf, len, res) \ + __getpwnam_r (name, bufp, buf, len, res) +@@ -69,11 +72,8 @@ + # ifndef GLOB_LSTAT + # define GLOB_LSTAT gl_lstat + # endif +-# ifndef GLOB_STAT64 +-# define GLOB_STAT64 __stat64 +-# endif +-# ifndef GLOB_LSTAT64 +-# define GLOB_LSTAT64 __lstat64 ++# ifndef GLOB_FSTATAT64 ++# define GLOB_FSTATAT64 __fstatat64 + # endif + # include + #else /* !_LIBC */ +@@ -88,8 +88,7 @@ + # define struct_stat struct stat + # define struct_stat64 struct stat + # define GLOB_LSTAT gl_lstat +-# define GLOB_STAT64 stat +-# define GLOB_LSTAT64 lstat ++# define GLOB_FSTATAT64 fstatat + #endif /* _LIBC */ + + #include +@@ -215,7 +214,8 @@ glob_lstat (glob_t *pglob, int flags, const char *fullname) + } ust; + return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC) + ? pglob->GLOB_LSTAT (fullname, &ust.st) +- : GLOB_LSTAT64 (fullname, &ust.st64)); ++ : GLOB_FSTATAT64 (AT_FDCWD, fullname, &ust.st64, ++ AT_SYMLINK_NOFOLLOW)); + } + + /* Set *R = A + B. Return true if the answer is mathematically +@@ -257,7 +257,8 @@ is_dir (char const *filename, int flags, glob_t const *pglob) + struct_stat64 st64; + return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC) + ? pglob->gl_stat (filename, &st) == 0 && S_ISDIR (st.st_mode) +- : GLOB_STAT64 (filename, &st64) == 0 && S_ISDIR (st64.st_mode)); ++ : (GLOB_FSTATAT64 (AT_FDCWD, filename, &st64, 0) == 0 ++ && S_ISDIR (st64.st_mode))); + } + + /* Find the end of the sub-pattern in a brace expression. */ +@@ -747,6 +748,8 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), + else + { + #ifndef WINDOWS32 ++ /* Recognize ~user as a shorthand for the specified user's home ++ directory. */ + char *end_name = strchr (dirname, '/'); + char *user_name; + int malloc_user_name = 0; +@@ -885,7 +888,22 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), + } + scratch_buffer_free (&pwtmpbuf); + } +-#endif /* !WINDOWS32 */ ++#else /* WINDOWS32 */ ++ /* On native Windows, access to a user's home directory ++ (via GetUserProfileDirectory) or to a user's environment ++ variables (via ExpandEnvironmentStringsForUser) requires ++ the credentials of the user. Therefore we cannot support ++ the ~user syntax on this platform. ++ Handling ~user specially (and treat it like plain ~) if ++ user is getenv ("USERNAME") would not be a good idea, ++ since it would make people think that ~user is supported ++ in general. */ ++ if (flags & GLOB_TILDE_CHECK) ++ { ++ retval = GLOB_NOMATCH; ++ goto out; ++ } ++#endif /* WINDOWS32 */ + } + } + +@@ -1266,6 +1284,8 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + { + size_t dirlen = strlen (directory); + void *stream = NULL; ++ struct scratch_buffer s; ++ scratch_buffer_init (&s); + # define GLOBNAMES_MEMBERS(nnames) \ + struct globnames *next; size_t count; char *name[nnames]; + struct globnames { GLOBNAMES_MEMBERS (FLEXIBLE_ARRAY_MEMBER) }; +@@ -1337,6 +1357,7 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + } + else + { ++ int dfd = dirfd (stream); + int fnm_flags = ((!(flags & GLOB_PERIOD) ? FNM_PERIOD : 0) + | ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)); + flags |= GLOB_MAGCHAR; +@@ -1364,8 +1385,32 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + if (flags & GLOB_ONLYDIR) + switch (readdir_result_type (d)) + { +- case DT_DIR: case DT_LNK: case DT_UNKNOWN: break; + default: continue; ++ case DT_DIR: break; ++ case DT_LNK: case DT_UNKNOWN: ++ /* The filesystem was too lazy to give us a hint, ++ so we have to do it the hard way. */ ++ if (__glibc_unlikely (dfd < 0 || flags & GLOB_ALTDIRFUNC)) ++ { ++ size_t namelen = strlen (d.name); ++ size_t need = dirlen + 1 + namelen + 1; ++ if (s.length < need ++ && !scratch_buffer_set_array_size (&s, need, 1)) ++ goto memory_error; ++ char *p = mempcpy (s.data, directory, dirlen); ++ *p = '/'; ++ p += p[-1] != '/'; ++ memcpy (p, d.name, namelen + 1); ++ if (! is_dir (s.data, flags, pglob)) ++ continue; ++ } ++ else ++ { ++ struct_stat64 st64; ++ if (! (GLOB_FSTATAT64 (dfd, d.name, &st64, 0) == 0 ++ && S_ISDIR (st64.st_mode))) ++ continue; ++ } + } + + if (fnmatch (pattern, d.name, fnm_flags) == 0) +@@ -1497,5 +1542,6 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + __set_errno (save); + } + ++ scratch_buffer_free (&s); + return result; + } +diff --git a/sysdeps/unix/sysv/linux/glob64-time64.c b/sysdeps/unix/sysv/linux/glob64-time64.c +index a465f70905e5a8a3..95efe4c4f4624967 100644 +--- a/sysdeps/unix/sysv/linux/glob64-time64.c ++++ b/sysdeps/unix/sysv/linux/glob64-time64.c +@@ -37,6 +37,7 @@ + # define GLOB_LSTAT gl_lstat + # define GLOB_STAT64 __stat64_time64 + # define GLOB_LSTAT64 __lstat64_time64 ++# define GLOB_FSTATAT64 __fstatat64_time64 + + # define COMPILE_GLOB64 1 + diff --git a/SOURCES/glibc-upstream-2.34-191.patch b/SOURCES/glibc-upstream-2.34-191.patch new file mode 100644 index 0000000..55b6a81 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-191.patch @@ -0,0 +1,35 @@ +commit bc6fba3c8048b11c9f73db03339c97a2fec3f0cf +Author: Joseph Myers +Date: Wed Nov 17 14:25:16 2021 +0000 + + Add PF_MCTP, AF_MCTP from Linux 5.15 to bits/socket.h + + Linux 5.15 adds a new address / protocol family PF_MCTP / AF_MCTP; add + these constants to bits/socket.h. + + Tested for x86_64. + + (cherry picked from commit bdeb7a8fa9989d18dab6310753d04d908125dc1d) + +diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h +index a011a8c0959b9970..7bb9e863d7329da9 100644 +--- a/sysdeps/unix/sysv/linux/bits/socket.h ++++ b/sysdeps/unix/sysv/linux/bits/socket.h +@@ -86,7 +86,8 @@ typedef __socklen_t socklen_t; + #define PF_QIPCRTR 42 /* Qualcomm IPC Router. */ + #define PF_SMC 43 /* SMC sockets. */ + #define PF_XDP 44 /* XDP sockets. */ +-#define PF_MAX 45 /* For now.. */ ++#define PF_MCTP 45 /* Management component transport protocol. */ ++#define PF_MAX 46 /* For now.. */ + + /* Address families. */ + #define AF_UNSPEC PF_UNSPEC +@@ -137,6 +138,7 @@ typedef __socklen_t socklen_t; + #define AF_QIPCRTR PF_QIPCRTR + #define AF_SMC PF_SMC + #define AF_XDP PF_XDP ++#define AF_MCTP PF_MCTP + #define AF_MAX PF_MAX + + /* Socket level values. Others are defined in the appropriate headers. diff --git a/SOURCES/glibc-upstream-2.34-192.patch b/SOURCES/glibc-upstream-2.34-192.patch new file mode 100644 index 0000000..5a89460 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-192.patch @@ -0,0 +1,27 @@ +commit fd5dbfd1cd98cb2f12f9e9f7004a4d25ab0c977f +Author: Joseph Myers +Date: Mon Nov 22 15:30:12 2021 +0000 + + Update kernel version to 5.15 in tst-mman-consts.py + + This patch updates the kernel version in the test tst-mman-consts.py + to 5.15. (There are no new MAP_* constants covered by this test in + 5.15 that need any other header changes.) + + Tested with build-many-glibcs.py. + + (cherry picked from commit 5c3ece451d46a7d8721311609bfcb6faafacb39e) + +diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py +index 810433c238f31c25..eeccdfd04dae57ab 100644 +--- a/sysdeps/unix/sysv/linux/tst-mman-consts.py ++++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py +@@ -33,7 +33,7 @@ def main(): + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) +- linux_version_glibc = (5, 14) ++ linux_version_glibc = (5, 15) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include \n', diff --git a/SOURCES/glibc-upstream-2.34-193.patch b/SOURCES/glibc-upstream-2.34-193.patch new file mode 100644 index 0000000..d056d36 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-193.patch @@ -0,0 +1,28 @@ +commit 5146b73d72ced9bab125e986aa99ef5fe2f88475 +Author: Joseph Myers +Date: Mon Dec 20 15:38:32 2021 +0000 + + Add ARPHRD_CAN, ARPHRD_MCTP to net/if_arp.h + + Add the constant ARPHRD_MCTP, from Linux 5.15, to net/if_arp.h, along + with ARPHRD_CAN which was added to Linux in version 2.6.25 (commit + cd05acfe65ed2cf2db683fa9a6adb8d35635263b, "[CAN]: Allocate protocol + numbers for PF_CAN") but apparently missed for glibc at the time. + + Tested for x86_64. + + (cherry picked from commit a94d9659cd69dbc70d3494b1cbbbb5a1551675c5) + +diff --git a/sysdeps/unix/sysv/linux/net/if_arp.h b/sysdeps/unix/sysv/linux/net/if_arp.h +index 2a8933cde7cf236d..42910b776660def1 100644 +--- a/sysdeps/unix/sysv/linux/net/if_arp.h ++++ b/sysdeps/unix/sysv/linux/net/if_arp.h +@@ -95,6 +95,8 @@ struct arphdr + #define ARPHRD_ROSE 270 + #define ARPHRD_X25 271 /* CCITT X.25. */ + #define ARPHRD_HWX25 272 /* Boards with X.25 in firmware. */ ++#define ARPHRD_CAN 280 /* Controller Area Network. */ ++#define ARPHRD_MCTP 290 + #define ARPHRD_PPP 512 + #define ARPHRD_CISCO 513 /* Cisco HDLC. */ + #define ARPHRD_HDLC ARPHRD_CISCO diff --git a/SOURCES/glibc-upstream-2.34-194.patch b/SOURCES/glibc-upstream-2.34-194.patch new file mode 100644 index 0000000..0437f53 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-194.patch @@ -0,0 +1,337 @@ +commit 6af165658d0999ac2c4e9ce88bee020fbc2ee49f +Author: Joseph Myers +Date: Wed Mar 23 17:11:56 2022 +0000 + + Update syscall lists for Linux 5.17 + + Linux 5.17 has one new syscall, set_mempolicy_home_node. Update + syscall-names.list and regenerate the arch-syscall.h headers with + build-many-glibcs.py update-syscalls. + + Tested with build-many-glibcs.py. + + (cherry picked from commit 8ef9196b26793830515402ea95aca2629f7721ec) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +index 9905ebedf298954c..4fcb6da80af37e9e 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +@@ -236,6 +236,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_tid_address 96 + #define __NR_setdomainname 162 +diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h +index ee8085be69958b25..0cf74c1a96bb1235 100644 +--- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h +@@ -391,6 +391,7 @@ + #define __NR_sendmsg 114 + #define __NR_sendto 133 + #define __NR_set_mempolicy 431 ++#define __NR_set_mempolicy_home_node 560 + #define __NR_set_robust_list 466 + #define __NR_set_tid_address 411 + #define __NR_setdomainname 166 +diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h +index 1b626d97705d545a..c1207aaa12be6a51 100644 +--- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h +@@ -238,6 +238,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_tid_address 96 + #define __NR_setdomainname 162 +diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h +index 96ef8db9368e7de4..e7ba04c106d8af7d 100644 +--- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h +@@ -302,6 +302,7 @@ + #define __NR_sendmsg 296 + #define __NR_sendto 290 + #define __NR_set_mempolicy 321 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 338 + #define __NR_set_tid_address 256 + #define __NR_set_tls 983045 +diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h +index 96910154ed6a5c1b..dc9383758ebc641b 100644 +--- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h +@@ -250,6 +250,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_thread_area 244 + #define __NR_set_tid_address 96 +diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h +index 36675fd48e6f50c5..767f1287a30b473e 100644 +--- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h +@@ -289,6 +289,7 @@ + #define __NR_sendmsg 183 + #define __NR_sendto 82 + #define __NR_set_mempolicy 262 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 289 + #define __NR_set_tid_address 237 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h +index c86ccbda4681066c..1998f0d76a444cac 100644 +--- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h +@@ -323,6 +323,7 @@ + #define __NR_sendmsg 370 + #define __NR_sendto 369 + #define __NR_set_mempolicy 276 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 311 + #define __NR_set_thread_area 243 + #define __NR_set_tid_address 258 +diff --git a/sysdeps/unix/sysv/linux/ia64/arch-syscall.h b/sysdeps/unix/sysv/linux/ia64/arch-syscall.h +index d898bce404955ef0..b2eab1b93d70b9de 100644 +--- a/sysdeps/unix/sysv/linux/ia64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/ia64/arch-syscall.h +@@ -272,6 +272,7 @@ + #define __NR_sendmsg 1205 + #define __NR_sendto 1199 + #define __NR_set_mempolicy 1261 ++#define __NR_set_mempolicy_home_node 1474 + #define __NR_set_robust_list 1298 + #define __NR_set_tid_address 1233 + #define __NR_setdomainname 1129 +diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h +index fe721b809076abeb..5fc3723772f92516 100644 +--- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h +@@ -310,6 +310,7 @@ + #define __NR_sendmsg 367 + #define __NR_sendto 366 + #define __NR_set_mempolicy 270 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 304 + #define __NR_set_thread_area 334 + #define __NR_set_tid_address 253 +diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h +index 6e10c3661db96a1e..b6e9b007e496cd80 100644 +--- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h +@@ -326,6 +326,7 @@ + #define __NR_sendmsg 360 + #define __NR_sendto 353 + #define __NR_set_mempolicy 276 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 311 + #define __NR_set_thread_area 243 + #define __NR_set_tid_address 258 +diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h +index 26a6d594a2222f15..b3a3871f8ab8a23e 100644 +--- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h +@@ -308,6 +308,7 @@ + #define __NR_sendmsg 4179 + #define __NR_sendto 4180 + #define __NR_set_mempolicy 4270 ++#define __NR_set_mempolicy_home_node 4450 + #define __NR_set_robust_list 4309 + #define __NR_set_thread_area 4283 + #define __NR_set_tid_address 4252 +diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h +index 83e0d49c5e3ca1bc..b462182723aff286 100644 +--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h +@@ -288,6 +288,7 @@ + #define __NR_sendmsg 6045 + #define __NR_sendto 6043 + #define __NR_set_mempolicy 6233 ++#define __NR_set_mempolicy_home_node 6450 + #define __NR_set_robust_list 6272 + #define __NR_set_thread_area 6246 + #define __NR_set_tid_address 6213 +diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h +index d6747c542f63202b..a9d6b94572e93001 100644 +--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h +@@ -270,6 +270,7 @@ + #define __NR_sendmsg 5045 + #define __NR_sendto 5043 + #define __NR_set_mempolicy 5229 ++#define __NR_set_mempolicy_home_node 5450 + #define __NR_set_robust_list 5268 + #define __NR_set_thread_area 5242 + #define __NR_set_tid_address 5212 +diff --git a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +index 4ee209bc4475ea7d..809a219ef32a45ef 100644 +--- a/sysdeps/unix/sysv/linux/nios2/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/nios2/arch-syscall.h +@@ -250,6 +250,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_tid_address 96 + #define __NR_setdomainname 162 +diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h +index 497299fbc47a708c..627831ebae1b9e90 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h +@@ -319,6 +319,7 @@ + #define __NR_sendmsg 341 + #define __NR_sendto 335 + #define __NR_set_mempolicy 261 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 300 + #define __NR_set_tid_address 232 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h +index e840279f171b10b9..bae597199d79eaad 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h +@@ -298,6 +298,7 @@ + #define __NR_sendmsg 341 + #define __NR_sendto 335 + #define __NR_set_mempolicy 261 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 300 + #define __NR_set_tid_address 232 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h +index 73ef74c005e5a2bb..bf4be80f8d380963 100644 +--- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h +@@ -228,6 +228,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_tid_address 96 + #define __NR_setdomainname 162 +diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h +index 919a79ee91177459..d656aedcc2be6009 100644 +--- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h +@@ -235,6 +235,7 @@ + #define __NR_sendmsg 211 + #define __NR_sendto 206 + #define __NR_set_mempolicy 237 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 99 + #define __NR_set_tid_address 96 + #define __NR_setdomainname 162 +diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h +index 005c0ada7aab85a1..57025107e82c9439 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h +@@ -311,6 +311,7 @@ + #define __NR_sendmsg 370 + #define __NR_sendto 369 + #define __NR_set_mempolicy 270 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 304 + #define __NR_set_tid_address 252 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h +index 9131fddcc16116e4..72e19c6d569fbf9b 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h +@@ -278,6 +278,7 @@ + #define __NR_sendmsg 370 + #define __NR_sendto 369 + #define __NR_set_mempolicy 270 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 304 + #define __NR_set_tid_address 252 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h +index d8fb041568ecb4da..d52b522d9cac87ef 100644 +--- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h +@@ -303,6 +303,7 @@ + #define __NR_sendmsg 355 + #define __NR_sendto 349 + #define __NR_set_mempolicy 276 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 311 + #define __NR_set_tid_address 258 + #define __NR_setdomainname 121 +diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h +index 2bc014fe6a1a1f4a..d3f4d8aa3edb4795 100644 +--- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h +@@ -310,6 +310,7 @@ + #define __NR_sendmsg 114 + #define __NR_sendto 133 + #define __NR_set_mempolicy 305 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 300 + #define __NR_set_tid_address 166 + #define __NR_setdomainname 163 +diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h +index 76dbbe595ffe868f..2cc03d7a24453335 100644 +--- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h +@@ -286,6 +286,7 @@ + #define __NR_sendmsg 114 + #define __NR_sendto 133 + #define __NR_set_mempolicy 305 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 300 + #define __NR_set_tid_address 166 + #define __NR_setdomainname 163 +diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list +index 0bc2af37dfa1eeb5..e2743c649586d97a 100644 +--- a/sysdeps/unix/sysv/linux/syscall-names.list ++++ b/sysdeps/unix/sysv/linux/syscall-names.list +@@ -21,8 +21,8 @@ + # This file can list all potential system calls. The names are only + # used if the installed kernel headers also provide them. + +-# The list of system calls is current as of Linux 5.16. +-kernel 5.16 ++# The list of system calls is current as of Linux 5.17. ++kernel 5.17 + + FAST_atomic_update + FAST_cmpxchg +@@ -523,6 +523,7 @@ sendmmsg + sendmsg + sendto + set_mempolicy ++set_mempolicy_home_node + set_robust_list + set_thread_area + set_tid_address +diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +index 28558279b48a1ef4..b4ab892ec183e32d 100644 +--- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +@@ -278,6 +278,7 @@ + #define __NR_sendmsg 46 + #define __NR_sendto 44 + #define __NR_set_mempolicy 238 ++#define __NR_set_mempolicy_home_node 450 + #define __NR_set_robust_list 273 + #define __NR_set_thread_area 205 + #define __NR_set_tid_address 218 +diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +index c1ab8ec45e8b8fd3..772559c87b3625b8 100644 +--- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h ++++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +@@ -270,6 +270,7 @@ + #define __NR_sendmsg 1073742342 + #define __NR_sendto 1073741868 + #define __NR_set_mempolicy 1073742062 ++#define __NR_set_mempolicy_home_node 1073742274 + #define __NR_set_robust_list 1073742354 + #define __NR_set_thread_area 1073742029 + #define __NR_set_tid_address 1073742042 diff --git a/SOURCES/glibc-upstream-2.34-195.patch b/SOURCES/glibc-upstream-2.34-195.patch new file mode 100644 index 0000000..d2b7afb --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-195.patch @@ -0,0 +1,27 @@ +commit 81181ba5d916fc49bd737f603e28a3c2dc8430b4 +Author: Joseph Myers +Date: Wed Feb 16 14:19:24 2022 +0000 + + Update kernel version to 5.16 in tst-mman-consts.py + + This patch updates the kernel version in the test tst-mman-consts.py + to 5.16. (There are no new MAP_* constants covered by this test in + 5.16 that need any other header changes.) + + Tested with build-many-glibcs.py. + + (cherry picked from commit 790a607e234aa10d4b977a1b80aebe8a2acac970) + +diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py +index eeccdfd04dae57ab..8102d80b6660e523 100644 +--- a/sysdeps/unix/sysv/linux/tst-mman-consts.py ++++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py +@@ -33,7 +33,7 @@ def main(): + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) +- linux_version_glibc = (5, 15) ++ linux_version_glibc = (5, 16) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include \n', diff --git a/SOURCES/glibc-upstream-2.34-196.patch b/SOURCES/glibc-upstream-2.34-196.patch new file mode 100644 index 0000000..5294eea --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-196.patch @@ -0,0 +1,27 @@ +commit 0499c3a95fb864284fef36d3e9c5a54f6646b2db +Author: Joseph Myers +Date: Thu Mar 24 15:35:27 2022 +0000 + + Update kernel version to 5.17 in tst-mman-consts.py + + This patch updates the kernel version in the test tst-mman-consts.py + to 5.17. (There are no new MAP_* constants covered by this test in + 5.17 that need any other header changes.) + + Tested with build-many-glibcs.py. + + (cherry picked from commit 23808a422e6036accaba7236fd3b9a0d7ab7e8ee) + +diff --git a/sysdeps/unix/sysv/linux/tst-mman-consts.py b/sysdeps/unix/sysv/linux/tst-mman-consts.py +index 8102d80b6660e523..724c7375c3a1623b 100644 +--- a/sysdeps/unix/sysv/linux/tst-mman-consts.py ++++ b/sysdeps/unix/sysv/linux/tst-mman-consts.py +@@ -33,7 +33,7 @@ def main(): + help='C compiler (including options) to use') + args = parser.parse_args() + linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc) +- linux_version_glibc = (5, 16) ++ linux_version_glibc = (5, 17) + sys.exit(glibcextract.compare_macro_consts( + '#define _GNU_SOURCE 1\n' + '#include \n', diff --git a/SOURCES/glibc-upstream-2.34-197.patch b/SOURCES/glibc-upstream-2.34-197.patch new file mode 100644 index 0000000..afe47ec --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-197.patch @@ -0,0 +1,26 @@ +commit f858bc309315a03ff6b1a048f59405c159d23430 +Author: Joseph Myers +Date: Mon Feb 21 22:49:36 2022 +0000 + + Add SOL_MPTCP, SOL_MCTP from Linux 5.16 to bits/socket.h + + Linux 5.16 adds constants SOL_MPTCP and SOL_MCTP to the getsockopt / + setsockopt levels; add these constants to bits/socket.h. + + Tested for x86_64. + + (cherry picked from commit fdc1ae67fef27eea1445bab4bdfe2f0fb3bc7aa1) + +diff --git a/sysdeps/unix/sysv/linux/bits/socket.h b/sysdeps/unix/sysv/linux/bits/socket.h +index 7bb9e863d7329da9..c81fab840918924e 100644 +--- a/sysdeps/unix/sysv/linux/bits/socket.h ++++ b/sysdeps/unix/sysv/linux/bits/socket.h +@@ -169,6 +169,8 @@ typedef __socklen_t socklen_t; + #define SOL_KCM 281 + #define SOL_TLS 282 + #define SOL_XDP 283 ++#define SOL_MPTCP 284 ++#define SOL_MCTP 285 + + /* Maximum queue length specifiable by listen. */ + #define SOMAXCONN 4096 diff --git a/SOURCES/glibc-upstream-2.34-198.patch b/SOURCES/glibc-upstream-2.34-198.patch new file mode 100644 index 0000000..67ab10c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-198.patch @@ -0,0 +1,21 @@ +commit c108e87026d61d6744e3e55704e0bea937243f5a +Author: Szabolcs Nagy +Date: Tue Dec 14 11:15:07 2021 +0000 + + aarch64: Add HWCAP2_ECV from Linux 5.16 + + Indicates the availability of enhanced counter virtualization extension + of armv8.6-a with self-synchronized virtual counter CNTVCTSS_EL0 usable + in userspace. + + (cherry picked from commit 5a1be8ebdf6f02d4efec6e5f12ad06db17511f90) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +index 30fda0a4a347695e..04cc762015a7230a 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +@@ -74,3 +74,4 @@ + #define HWCAP2_RNG (1 << 16) + #define HWCAP2_BTI (1 << 17) + #define HWCAP2_MTE (1 << 18) ++#define HWCAP2_ECV (1 << 19) diff --git a/SOURCES/glibc-upstream-2.34-199.patch b/SOURCES/glibc-upstream-2.34-199.patch new file mode 100644 index 0000000..02675fc --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-199.patch @@ -0,0 +1,21 @@ +commit 97cb8227b864b8ea0d99a4a50e4163baad3e1c72 +Author: Joseph Myers +Date: Mon Mar 28 13:16:48 2022 +0000 + + Add HWCAP2_AFP, HWCAP2_RPRES from Linux 5.17 to AArch64 bits/hwcap.h + + Add the new HWCAP2_AFP and HWCAP2_RPRES constants from Linux 5.17. + Tested with build-many-glibcs.py for aarch64-linux-gnu. + + (cherry picked from commit 866c599182e87f116440b5d854f9e99533c48eb3) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +index 04cc762015a7230a..9a5c4116b3fe9903 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +@@ -75,3 +75,5 @@ + #define HWCAP2_BTI (1 << 17) + #define HWCAP2_MTE (1 << 18) + #define HWCAP2_ECV (1 << 19) ++#define HWCAP2_AFP (1 << 20) ++#define HWCAP2_RPRES (1 << 21) diff --git a/SOURCES/glibc-upstream-2.34-200.patch b/SOURCES/glibc-upstream-2.34-200.patch new file mode 100644 index 0000000..7ad14c9 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-200.patch @@ -0,0 +1,29 @@ +commit 31af92b9c8cf753992d45c801a855a02060afc08 +Author: Siddhesh Poyarekar +Date: Wed May 4 15:56:47 2022 +0530 + + manual: Clarify that abbreviations of long options are allowed + + The man page and code comments clearly state that abbreviations of long + option names are recognized correctly as long as they are unique. + Document this fact in the glibc manual as well. + + Signed-off-by: Siddhesh Poyarekar + Reviewed-by: Florian Weimer + Reviewed-by: Andreas Schwab + (cherry picked from commit db1efe02c9f15affc3908d6ae73875b82898a489) + +diff --git a/manual/getopt.texi b/manual/getopt.texi +index 5485fc46946631f7..b4c0b15ac2060560 100644 +--- a/manual/getopt.texi ++++ b/manual/getopt.texi +@@ -250,7 +250,8 @@ option, and stores the option's argument (if it has one) in @code{optarg}. + + When @code{getopt_long} encounters a long option, it takes actions based + on the @code{flag} and @code{val} fields of the definition of that +-option. ++option. The option name may be abbreviated as long as the abbreviation is ++unique. + + If @code{flag} is a null pointer, then @code{getopt_long} returns the + contents of @code{val} to indicate which option it found. You should diff --git a/SOURCES/glibc-upstream-2.34-201.patch b/SOURCES/glibc-upstream-2.34-201.patch new file mode 100644 index 0000000..68ca969 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-201.patch @@ -0,0 +1,1789 @@ +commit 0d5b36c8cc15f064e302d29692853f8a760e1547 +Author: Noah Goldstein +Date: Mon Jan 10 15:35:38 2022 -0600 + + x86: Optimize strcmp-avx2.S + + Optimization are primarily to the loop logic and how the page cross + logic interacts with the loop. + + The page cross logic is at times more expensive for short strings near + the end of a page but not crossing the page. This is done to retest + the page cross conditions with a non-faulty check and to improve the + logic for entering the loop afterwards. This is only particular cases, + however, and is general made up for by more than 10x improvements on + the transition from the page cross -> loop case. + + The non-page cross cases are improved most for smaller sizes [0, 128] + and go about even for (128, 4096]. The loop page cross logic is + improved so some more significant speedup is seen there as well. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass. + + Signed-off-by: Noah Goldstein + (cherry picked from commit b77b06e0e296f1a2276c27a67e1d44f2cfa38d45) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index fa70c994fc25dfd8..a0d1c65db11028bc 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -26,35 +26,57 @@ + + # define PAGE_SIZE 4096 + +-/* VEC_SIZE = Number of bytes in a ymm register */ ++ /* VEC_SIZE = Number of bytes in a ymm register. */ + # define VEC_SIZE 32 + +-/* Shift for dividing by (VEC_SIZE * 4). */ +-# define DIVIDE_BY_VEC_4_SHIFT 7 +-# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +-# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +-# endif ++# define VMOVU vmovdqu ++# define VMOVA vmovdqa + + # ifdef USE_AS_WCSCMP +-/* Compare packed dwords. */ ++ /* Compare packed dwords. */ + # define VPCMPEQ vpcmpeqd +-/* Compare packed dwords and store minimum. */ ++ /* Compare packed dwords and store minimum. */ + # define VPMINU vpminud +-/* 1 dword char == 4 bytes. */ ++ /* 1 dword char == 4 bytes. */ + # define SIZE_OF_CHAR 4 + # else +-/* Compare packed bytes. */ ++ /* Compare packed bytes. */ + # define VPCMPEQ vpcmpeqb +-/* Compare packed bytes and store minimum. */ ++ /* Compare packed bytes and store minimum. */ + # define VPMINU vpminub +-/* 1 byte char == 1 byte. */ ++ /* 1 byte char == 1 byte. */ + # define SIZE_OF_CHAR 1 + # endif + ++# ifdef USE_AS_STRNCMP ++# define LOOP_REG r9d ++# define LOOP_REG64 r9 ++ ++# define OFFSET_REG8 r9b ++# define OFFSET_REG r9d ++# define OFFSET_REG64 r9 ++# else ++# define LOOP_REG edx ++# define LOOP_REG64 rdx ++ ++# define OFFSET_REG8 dl ++# define OFFSET_REG edx ++# define OFFSET_REG64 rdx ++# endif ++ + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + ++# if defined USE_AS_STRNCMP ++# define VEC_OFFSET 0 ++# else ++# define VEC_OFFSET (-VEC_SIZE) ++# endif ++ ++# define xmmZERO xmm15 ++# define ymmZERO ymm15 ++ + # ifndef SECTION + # define SECTION(p) p##.avx + # endif +@@ -79,783 +101,1049 @@ + the maximum offset is reached before a difference is found, zero is + returned. */ + +- .section SECTION(.text),"ax",@progbits +-ENTRY (STRCMP) ++ .section SECTION(.text), "ax", @progbits ++ENTRY(STRCMP) + # ifdef USE_AS_STRNCMP +- /* Check for simple cases (0 or 1) in offset. */ ++# ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %edx, %rdx ++# endif + cmp $1, %RDX_LP +- je L(char0) +- jb L(zero) ++ /* Signed comparison intentional. We use this branch to also ++ test cases where length >= 2^63. These very large sizes can be ++ handled with strcmp as there is no way for that length to ++ actually bound the buffer. */ ++ jle L(one_or_less) + # ifdef USE_AS_WCSCMP +-# ifndef __ILP32__ + movq %rdx, %rcx +- /* Check if length could overflow when multiplied by +- sizeof(wchar_t). Checking top 8 bits will cover all potential +- overflow cases as well as redirect cases where its impossible to +- length to bound a valid memory region. In these cases just use +- 'wcscmp'. */ ++ ++ /* Multiplying length by sizeof(wchar_t) can result in overflow. ++ Check if that is possible. All cases where overflow are possible ++ are cases where length is large enough that it can never be a ++ bound on valid memory so just use wcscmp. */ + shrq $56, %rcx +- jnz OVERFLOW_STRCMP +-# endif +- /* Convert units: from wide to byte char. */ +- shl $2, %RDX_LP ++ jnz __wcscmp_avx2 ++ ++ leaq (, %rdx, 4), %rdx + # endif +- /* Register %r11 tracks the maximum offset. */ +- mov %RDX_LP, %R11_LP + # endif ++ vpxor %xmmZERO, %xmmZERO, %xmmZERO + movl %edi, %eax +- xorl %edx, %edx +- /* Make %xmm7 (%ymm7) all zeros in this function. */ +- vpxor %xmm7, %xmm7, %xmm7 + orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax +- jg L(cross_page) +- /* Start comparing 4 vectors. */ +- vmovdqu (%rdi), %ymm1 +- VPCMPEQ (%rsi), %ymm1, %ymm0 +- VPMINU %ymm1, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm0, %ymm0 +- vpmovmskb %ymm0, %ecx +- testl %ecx, %ecx +- je L(next_3_vectors) +- tzcntl %ecx, %edx ++ sall $20, %eax ++ /* Check if s1 or s2 may cross a page in next 4x VEC loads. */ ++ cmpl $((PAGE_SIZE -(VEC_SIZE * 4)) << 20), %eax ++ ja L(page_cross) ++ ++L(no_page_cross): ++ /* Safe to compare 4x vectors. */ ++ VMOVU (%rdi), %ymm0 ++ /* 1s where s1 and s2 equal. */ ++ VPCMPEQ (%rsi), %ymm0, %ymm1 ++ /* 1s at null CHAR. */ ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ /* 1s where s1 and s2 equal AND not null CHAR. */ ++ vpandn %ymm1, %ymm2, %ymm1 ++ ++ /* All 1s -> keep going, any 0s -> return. */ ++ vpmovmskb %ymm1, %ecx + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx) is after the maximum +- offset (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq $VEC_SIZE, %rdx ++ jbe L(vec_0_test_len) + # endif ++ ++ /* All 1s represents all equals. incl will overflow to zero in ++ all equals case. Otherwise 1s will carry until position of first ++ mismatch. */ ++ incl %ecx ++ jz L(more_3x_vec) ++ ++ .p2align 4,, 4 ++L(return_vec_0): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP ++ movl (%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- je L(return) +-L(wcscmp_return): ++ cmpl (%rsi, %rcx), %edx ++ je L(ret0) + setl %al + negl %eax + orl $1, %eax +-L(return): + # else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif ++L(ret0): + L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + +- .p2align 4 +-L(return_vec_size): +- tzcntl %ecx, %edx + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after +- the maximum offset (%r11). */ +- addq $VEC_SIZE, %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP ++ .p2align 4,, 8 ++L(vec_0_test_len): ++ notl %ecx ++ bzhil %edx, %ecx, %eax ++ jnz L(return_vec_0) ++ /* Align if will cross fetch block. */ ++ .p2align 4,, 2 ++L(ret_zero): + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax +-# endif +-# else ++ VZEROUPPER_RETURN ++ ++ .p2align 4,, 5 ++L(one_or_less): ++ jb L(ret_zero) + # ifdef USE_AS_WCSCMP ++ /* 'nbe' covers the case where length is negative (large ++ unsigned). */ ++ jnbe __wcscmp_avx2 ++ movl (%rdi), %edx + xorl %eax, %eax +- movl VEC_SIZE(%rdi, %rdx), %ecx +- cmpl VEC_SIZE(%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++ cmpl (%rsi), %edx ++ je L(ret1) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else +- movzbl VEC_SIZE(%rdi, %rdx), %eax +- movzbl VEC_SIZE(%rsi, %rdx), %edx +- subl %edx, %eax ++ /* 'nbe' covers the case where length is negative (large ++ unsigned). */ ++ ++ jnbe __strcmp_avx2 ++ movzbl (%rdi), %eax ++ movzbl (%rsi), %ecx ++ subl %ecx, %eax + # endif ++L(ret1): ++ ret + # endif +- VZEROUPPER_RETURN + +- .p2align 4 +-L(return_2_vec_size): +- tzcntl %ecx, %edx ++ .p2align 4,, 10 ++L(return_vec_1): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is +- after the maximum offset (%r11). */ +- addq $(VEC_SIZE * 2), %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP ++ /* rdx must be > CHAR_PER_VEC so save to subtract w.o fear of ++ overflow. */ ++ addq $-VEC_SIZE, %rdx ++ cmpq %rcx, %rdx ++ jbe L(ret_zero) ++# endif ++# ifdef USE_AS_WCSCMP ++ movl VEC_SIZE(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax +-# endif ++ cmpl VEC_SIZE(%rsi, %rcx), %edx ++ je L(ret2) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else +-# ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx +- cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax +- movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx +- subl %edx, %eax +-# endif ++ movzbl VEC_SIZE(%rdi, %rcx), %eax ++ movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif ++L(ret2): + VZEROUPPER_RETURN + +- .p2align 4 +-L(return_3_vec_size): +- tzcntl %ecx, %edx ++ .p2align 4,, 10 + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is +- after the maximum offset (%r11). */ +- addq $(VEC_SIZE * 3), %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP ++L(return_vec_3): ++ salq $32, %rcx ++# endif ++ ++L(return_vec_2): ++# ifndef USE_AS_STRNCMP ++ tzcntl %ecx, %ecx ++# else ++ tzcntq %rcx, %rcx ++ cmpq %rcx, %rdx ++ jbe L(ret_zero) ++# endif ++ ++# ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 2)(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax +-# endif ++ cmpl (VEC_SIZE * 2)(%rsi, %rcx), %edx ++ je L(ret3) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else ++ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++# endif ++L(ret3): ++ VZEROUPPER_RETURN ++ ++# ifndef USE_AS_STRNCMP ++ .p2align 4,, 10 ++L(return_vec_3): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 3)(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx +- cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++ cmpl (VEC_SIZE * 3)(%rsi, %rcx), %edx ++ je L(ret4) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else +- movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax +- movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx +- subl %edx, %eax ++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif +-# endif ++L(ret4): + VZEROUPPER_RETURN ++# endif ++ ++ .p2align 4,, 10 ++L(more_3x_vec): ++ /* Safe to compare 4x vectors. */ ++ VMOVU VEC_SIZE(%rdi), %ymm0 ++ VPCMPEQ VEC_SIZE(%rsi), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_1) ++ ++# ifdef USE_AS_STRNCMP ++ subq $(VEC_SIZE * 2), %rdx ++ jbe L(ret_zero) ++# endif ++ ++ VMOVU (VEC_SIZE * 2)(%rdi), %ymm0 ++ VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_2) ++ ++ VMOVU (VEC_SIZE * 3)(%rdi), %ymm0 ++ VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_3) + +- .p2align 4 +-L(next_3_vectors): +- vmovdqu VEC_SIZE(%rdi), %ymm6 +- VPCMPEQ VEC_SIZE(%rsi), %ymm6, %ymm3 +- VPMINU %ymm6, %ymm3, %ymm3 +- VPCMPEQ %ymm7, %ymm3, %ymm3 +- vpmovmskb %ymm3, %ecx +- testl %ecx, %ecx +- jne L(return_vec_size) +- vmovdqu (VEC_SIZE * 2)(%rdi), %ymm5 +- vmovdqu (VEC_SIZE * 3)(%rdi), %ymm4 +- vmovdqu (VEC_SIZE * 3)(%rsi), %ymm0 +- VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm5, %ymm2 +- VPMINU %ymm5, %ymm2, %ymm2 +- VPCMPEQ %ymm4, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm2, %ymm2 +- vpmovmskb %ymm2, %ecx +- testl %ecx, %ecx +- jne L(return_2_vec_size) +- VPMINU %ymm4, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm0, %ymm0 +- vpmovmskb %ymm0, %ecx +- testl %ecx, %ecx +- jne L(return_3_vec_size) +-L(main_loop_header): +- leaq (VEC_SIZE * 4)(%rdi), %rdx +- movl $PAGE_SIZE, %ecx +- /* Align load via RAX. */ +- andq $-(VEC_SIZE * 4), %rdx +- subq %rdi, %rdx +- leaq (%rdi, %rdx), %rax + # ifdef USE_AS_STRNCMP +- /* Starting from this point, the maximum offset, or simply the +- 'offset', DECREASES by the same amount when base pointers are +- moved forward. Return 0 when: +- 1) On match: offset <= the matched vector index. +- 2) On mistmach, offset is before the mistmatched index. ++ cmpq $(VEC_SIZE * 2), %rdx ++ jbe L(ret_zero) ++# endif ++ ++# ifdef USE_AS_WCSCMP ++ /* any non-zero positive value that doesn't inference with 0x1. + */ +- subq %rdx, %r11 +- jbe L(zero) +-# endif +- addq %rsi, %rdx +- movq %rdx, %rsi +- andl $(PAGE_SIZE - 1), %esi +- /* Number of bytes before page crossing. */ +- subq %rsi, %rcx +- /* Number of VEC_SIZE * 4 blocks before page crossing. */ +- shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx +- /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */ +- movl %ecx, %esi +- jmp L(loop_start) ++ movl $2, %r8d + ++# else ++ xorl %r8d, %r8d ++# endif ++ ++ /* The prepare labels are various entry points from the page ++ cross logic. */ ++L(prepare_loop): ++ ++# ifdef USE_AS_STRNCMP ++ /* Store N + (VEC_SIZE * 4) and place check at the begining of ++ the loop. */ ++ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx ++# endif ++L(prepare_loop_no_len): ++ ++ /* Align s1 and adjust s2 accordingly. */ ++ subq %rdi, %rsi ++ andq $-(VEC_SIZE * 4), %rdi ++ addq %rdi, %rsi ++ ++# ifdef USE_AS_STRNCMP ++ subq %rdi, %rdx ++# endif ++ ++L(prepare_loop_aligned): ++ /* eax stores distance from rsi to next page cross. These cases ++ need to be handled specially as the 4x loop could potentially ++ read memory past the length of s1 or s2 and across a page ++ boundary. */ ++ movl $-(VEC_SIZE * 4), %eax ++ subl %esi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ ++ /* Loop 4x comparisons at a time. */ + .p2align 4 + L(loop): ++ ++ /* End condition for strncmp. */ + # ifdef USE_AS_STRNCMP +- /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease +- the maximum offset (%r11) by the same amount. */ +- subq $(VEC_SIZE * 4), %r11 +- jbe L(zero) +-# endif +- addq $(VEC_SIZE * 4), %rax +- addq $(VEC_SIZE * 4), %rdx +-L(loop_start): +- testl %esi, %esi +- leal -1(%esi), %esi +- je L(loop_cross_page) +-L(back_to_loop): +- /* Main loop, comparing 4 vectors are a time. */ +- vmovdqa (%rax), %ymm0 +- vmovdqa VEC_SIZE(%rax), %ymm3 +- VPCMPEQ (%rdx), %ymm0, %ymm4 +- VPCMPEQ VEC_SIZE(%rdx), %ymm3, %ymm1 +- VPMINU %ymm0, %ymm4, %ymm4 +- VPMINU %ymm3, %ymm1, %ymm1 +- vmovdqa (VEC_SIZE * 2)(%rax), %ymm2 +- VPMINU %ymm1, %ymm4, %ymm0 +- vmovdqa (VEC_SIZE * 3)(%rax), %ymm3 +- VPCMPEQ (VEC_SIZE * 2)(%rdx), %ymm2, %ymm5 +- VPCMPEQ (VEC_SIZE * 3)(%rdx), %ymm3, %ymm6 +- VPMINU %ymm2, %ymm5, %ymm5 +- VPMINU %ymm3, %ymm6, %ymm6 +- VPMINU %ymm5, %ymm0, %ymm0 +- VPMINU %ymm6, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm0, %ymm0 +- +- /* Test each mask (32 bits) individually because for VEC_SIZE +- == 32 is not possible to OR the four masks and keep all bits +- in a 64-bit integer register, differing from SSE2 strcmp +- where ORing is possible. */ +- vpmovmskb %ymm0, %ecx ++ subq $(VEC_SIZE * 4), %rdx ++ jbe L(ret_zero) ++# endif ++ ++ subq $-(VEC_SIZE * 4), %rdi ++ subq $-(VEC_SIZE * 4), %rsi ++ ++ /* Check if rsi loads will cross a page boundary. */ ++ addl $-(VEC_SIZE * 4), %eax ++ jnb L(page_cross_during_loop) ++ ++ /* Loop entry after handling page cross during loop. */ ++L(loop_skip_page_cross_check): ++ VMOVA (VEC_SIZE * 0)(%rdi), %ymm0 ++ VMOVA (VEC_SIZE * 1)(%rdi), %ymm2 ++ VMOVA (VEC_SIZE * 2)(%rdi), %ymm4 ++ VMOVA (VEC_SIZE * 3)(%rdi), %ymm6 ++ ++ /* ymm1 all 1s where s1 and s2 equal. All 0s otherwise. */ ++ VPCMPEQ (VEC_SIZE * 0)(%rsi), %ymm0, %ymm1 ++ ++ VPCMPEQ (VEC_SIZE * 1)(%rsi), %ymm2, %ymm3 ++ VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm4, %ymm5 ++ VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm6, %ymm7 ++ ++ ++ /* If any mismatches or null CHAR then 0 CHAR, otherwise non- ++ zero. */ ++ vpand %ymm0, %ymm1, %ymm1 ++ ++ ++ vpand %ymm2, %ymm3, %ymm3 ++ vpand %ymm4, %ymm5, %ymm5 ++ vpand %ymm6, %ymm7, %ymm7 ++ ++ VPMINU %ymm1, %ymm3, %ymm3 ++ VPMINU %ymm5, %ymm7, %ymm7 ++ ++ /* Reduce all 0 CHARs for the 4x VEC into ymm7. */ ++ VPMINU %ymm3, %ymm7, %ymm7 ++ ++ /* If any 0 CHAR then done. */ ++ VPCMPEQ %ymm7, %ymmZERO, %ymm7 ++ vpmovmskb %ymm7, %LOOP_REG ++ testl %LOOP_REG, %LOOP_REG ++ jz L(loop) ++ ++ /* Find which VEC has the mismatch of end of string. */ ++ VPCMPEQ %ymm1, %ymmZERO, %ymm1 ++ vpmovmskb %ymm1, %ecx + testl %ecx, %ecx +- je L(loop) +- VPCMPEQ %ymm7, %ymm4, %ymm0 +- vpmovmskb %ymm0, %edi +- testl %edi, %edi +- je L(test_vec) +- tzcntl %edi, %ecx ++ jnz L(return_vec_0_end) ++ ++ ++ VPCMPEQ %ymm3, %ymmZERO, %ymm3 ++ vpmovmskb %ymm3, %ecx ++ testl %ecx, %ecx ++ jnz L(return_vec_1_end) ++ ++L(return_vec_2_3_end): + # ifdef USE_AS_STRNCMP +- cmpq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ subq $(VEC_SIZE * 2), %rdx ++ jbe L(ret_zero_end) ++# endif ++ ++ VPCMPEQ %ymm5, %ymmZERO, %ymm5 ++ vpmovmskb %ymm5, %ecx ++ testl %ecx, %ecx ++ jnz L(return_vec_2_end) ++ ++ /* LOOP_REG contains matches for null/mismatch from the loop. If ++ VEC 0,1,and 2 all have no null and no mismatches then mismatch ++ must entirely be from VEC 3 which is fully represented by ++ LOOP_REG. */ ++ tzcntl %LOOP_REG, %LOOP_REG ++ ++# ifdef USE_AS_STRNCMP ++ subl $-(VEC_SIZE), %LOOP_REG ++ cmpq %LOOP_REG64, %rdx ++ jbe L(ret_zero_end) ++# endif ++ ++# ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 2 - VEC_OFFSET)(%rdi, %LOOP_REG64), %ecx + xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ cmpl (VEC_SIZE * 2 - VEC_OFFSET)(%rsi, %LOOP_REG64), %ecx ++ je L(ret5) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rdi, %LOOP_REG64), %eax ++ movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rsi, %LOOP_REG64), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret5): + VZEROUPPER_RETURN + +- .p2align 4 +-L(test_vec): + # ifdef USE_AS_STRNCMP +- /* The first vector matched. Return 0 if the maximum offset +- (%r11) <= VEC_SIZE. */ +- cmpq $VEC_SIZE, %r11 +- jbe L(zero) ++ .p2align 4,, 2 ++L(ret_zero_end): ++ xorl %eax, %eax ++ VZEROUPPER_RETURN + # endif +- VPCMPEQ %ymm7, %ymm1, %ymm1 +- vpmovmskb %ymm1, %ecx +- testl %ecx, %ecx +- je L(test_2_vec) +- tzcntl %ecx, %edi ++ ++ ++ /* The L(return_vec_N_end) differ from L(return_vec_N) in that ++ they use the value of `r8` to negate the return value. This is ++ because the page cross logic can swap `rdi` and `rsi`. */ ++ .p2align 4,, 10 + # ifdef USE_AS_STRNCMP +- addq $VEC_SIZE, %rdi +- cmpq %rdi, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++L(return_vec_1_end): ++ salq $32, %rcx ++# endif ++L(return_vec_0_end): ++# ifndef USE_AS_STRNCMP ++ tzcntl %ecx, %ecx ++# else ++ tzcntq %rcx, %rcx ++ cmpq %rcx, %rdx ++ jbe L(ret_zero_end) ++# endif ++ ++# ifdef USE_AS_WCSCMP ++ movl (%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rsi, %rdi), %ecx +- cmpl (%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rdi), %eax +- movzbl (%rdx, %rdi), %edx +- subl %edx, %eax +-# endif ++ cmpl (%rsi, %rcx), %edx ++ je L(ret6) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax ++# endif ++L(ret6): ++ VZEROUPPER_RETURN ++ ++# ifndef USE_AS_STRNCMP ++ .p2align 4,, 10 ++L(return_vec_1_end): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ movl VEC_SIZE(%rdi, %rcx), %edx + xorl %eax, %eax +- movl VEC_SIZE(%rsi, %rdi), %ecx +- cmpl VEC_SIZE(%rdx, %rdi), %ecx +- jne L(wcscmp_return) ++ cmpl VEC_SIZE(%rsi, %rcx), %edx ++ je L(ret7) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else +- movzbl VEC_SIZE(%rax, %rdi), %eax +- movzbl VEC_SIZE(%rdx, %rdi), %edx +- subl %edx, %eax ++ movzbl VEC_SIZE(%rdi, %rcx), %eax ++ movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif +-# endif ++L(ret7): + VZEROUPPER_RETURN ++# endif + +- .p2align 4 +-L(test_2_vec): ++ .p2align 4,, 10 ++L(return_vec_2_end): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_STRNCMP +- /* The first 2 vectors matched. Return 0 if the maximum offset +- (%r11) <= 2 * VEC_SIZE. */ +- cmpq $(VEC_SIZE * 2), %r11 +- jbe L(zero) ++ cmpq %rcx, %rdx ++ jbe L(ret_zero_page_cross) + # endif +- VPCMPEQ %ymm7, %ymm5, %ymm5 +- vpmovmskb %ymm5, %ecx +- testl %ecx, %ecx +- je L(test_3_vec) +- tzcntl %ecx, %edi +-# ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 2), %rdi +- cmpq %rdi, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++# ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 2)(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rsi, %rdi), %ecx +- cmpl (%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rdi), %eax +- movzbl (%rdx, %rdi), %edx +- subl %edx, %eax +-# endif ++ cmpl (VEC_SIZE * 2)(%rsi, %rcx), %edx ++ je L(ret11) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx +- cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax +- movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx +- subl %edx, %eax +-# endif ++ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret11): + VZEROUPPER_RETURN + +- .p2align 4 +-L(test_3_vec): ++ ++ /* Page cross in rsi in next 4x VEC. */ ++ ++ /* TODO: Improve logic here. */ ++ .p2align 4,, 10 ++L(page_cross_during_loop): ++ /* eax contains [distance_from_page - (VEC_SIZE * 4)]. */ ++ ++ /* Optimistically rsi and rdi and both aligned inwhich case we ++ don't need any logic here. */ ++ cmpl $-(VEC_SIZE * 4), %eax ++ /* Don't adjust eax before jumping back to loop and we will ++ never hit page cross case again. */ ++ je L(loop_skip_page_cross_check) ++ ++ /* Check if we can safely load a VEC. */ ++ cmpl $-(VEC_SIZE * 3), %eax ++ jle L(less_1x_vec_till_page_cross) ++ ++ VMOVA (%rdi), %ymm0 ++ VPCMPEQ (%rsi), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_0_end) ++ ++ /* if distance >= 2x VEC then eax > -(VEC_SIZE * 2). */ ++ cmpl $-(VEC_SIZE * 2), %eax ++ jg L(more_2x_vec_till_page_cross) ++ ++ .p2align 4,, 4 ++L(less_1x_vec_till_page_cross): ++ subl $-(VEC_SIZE * 4), %eax ++ /* Guranteed safe to read from rdi - VEC_SIZE here. The only ++ concerning case is first iteration if incoming s1 was near start ++ of a page and s2 near end. If s1 was near the start of the page ++ we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe ++ to read back -VEC_SIZE. If rdi is truly at the start of a page ++ here, it means the previous page (rdi - VEC_SIZE) has already ++ been loaded earlier so must be valid. */ ++ VMOVU -VEC_SIZE(%rdi, %rax), %ymm0 ++ VPCMPEQ -VEC_SIZE(%rsi, %rax), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ ++ /* Mask of potentially valid bits. The lower bits can be out of ++ range comparisons (but safe regarding page crosses). */ ++ movl $-1, %r10d ++ shlxl %esi, %r10d, %r10d ++ notl %ecx ++ + # ifdef USE_AS_STRNCMP +- /* The first 3 vectors matched. Return 0 if the maximum offset +- (%r11) <= 3 * VEC_SIZE. */ +- cmpq $(VEC_SIZE * 3), %r11 +- jbe L(zero) +-# endif +- VPCMPEQ %ymm7, %ymm6, %ymm6 +- vpmovmskb %ymm6, %esi +- tzcntl %esi, %ecx ++ cmpq %rax, %rdx ++ jbe L(return_page_cross_end_check) ++# endif ++ movl %eax, %OFFSET_REG ++ addl $(PAGE_SIZE - VEC_SIZE * 4), %eax ++ ++ andl %r10d, %ecx ++ jz L(loop_skip_page_cross_check) ++ ++ .p2align 4,, 3 ++L(return_page_cross_end): ++ tzcntl %ecx, %ecx ++ + # ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 3), %rcx +- cmpq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %esi +- cmpl (%rdx, %rcx), %esi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ leal -VEC_SIZE(%OFFSET_REG64, %rcx), %ecx ++L(return_page_cross_cmp_mem): + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ addl %OFFSET_REG, %ecx ++# endif ++# ifdef USE_AS_WCSCMP ++ movl VEC_OFFSET(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (VEC_SIZE * 3)(%rsi, %rcx), %esi +- cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax +- movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ cmpl VEC_OFFSET(%rsi, %rcx), %edx ++ je L(ret8) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax ++# else ++ movzbl VEC_OFFSET(%rdi, %rcx), %eax ++ movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret8): + VZEROUPPER_RETURN + +- .p2align 4 +-L(loop_cross_page): +- xorl %r10d, %r10d +- movq %rdx, %rcx +- /* Align load via RDX. We load the extra ECX bytes which should +- be ignored. */ +- andl $((VEC_SIZE * 4) - 1), %ecx +- /* R10 is -RCX. */ +- subq %rcx, %r10 +- +- /* This works only if VEC_SIZE * 2 == 64. */ +-# if (VEC_SIZE * 2) != 64 +-# error (VEC_SIZE * 2) != 64 +-# endif +- +- /* Check if the first VEC_SIZE * 2 bytes should be ignored. */ +- cmpl $(VEC_SIZE * 2), %ecx +- jge L(loop_cross_page_2_vec) +- +- vmovdqu (%rax, %r10), %ymm2 +- vmovdqu VEC_SIZE(%rax, %r10), %ymm3 +- VPCMPEQ (%rdx, %r10), %ymm2, %ymm0 +- VPCMPEQ VEC_SIZE(%rdx, %r10), %ymm3, %ymm1 +- VPMINU %ymm2, %ymm0, %ymm0 +- VPMINU %ymm3, %ymm1, %ymm1 +- VPCMPEQ %ymm7, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm1, %ymm1 +- +- vpmovmskb %ymm0, %edi +- vpmovmskb %ymm1, %esi +- +- salq $32, %rsi +- xorq %rsi, %rdi +- +- /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ +- shrq %cl, %rdi +- +- testq %rdi, %rdi +- je L(loop_cross_page_2_vec) +- tzcntq %rdi, %rcx + # ifdef USE_AS_STRNCMP +- cmpq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ .p2align 4,, 10 ++L(return_page_cross_end_check): ++ tzcntl %ecx, %ecx ++ leal -VEC_SIZE(%rax, %rcx), %ecx ++ cmpl %ecx, %edx ++ ja L(return_page_cross_cmp_mem) + xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif +-# else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif +-# endif + VZEROUPPER_RETURN ++# endif + +- .p2align 4 +-L(loop_cross_page_2_vec): +- /* The first VEC_SIZE * 2 bytes match or are ignored. */ +- vmovdqu (VEC_SIZE * 2)(%rax, %r10), %ymm2 +- vmovdqu (VEC_SIZE * 3)(%rax, %r10), %ymm3 +- VPCMPEQ (VEC_SIZE * 2)(%rdx, %r10), %ymm2, %ymm5 +- VPMINU %ymm2, %ymm5, %ymm5 +- VPCMPEQ (VEC_SIZE * 3)(%rdx, %r10), %ymm3, %ymm6 +- VPCMPEQ %ymm7, %ymm5, %ymm5 +- VPMINU %ymm3, %ymm6, %ymm6 +- VPCMPEQ %ymm7, %ymm6, %ymm6 +- +- vpmovmskb %ymm5, %edi +- vpmovmskb %ymm6, %esi +- +- salq $32, %rsi +- xorq %rsi, %rdi + +- xorl %r8d, %r8d +- /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ +- subl $(VEC_SIZE * 2), %ecx +- jle 1f +- /* Skip ECX bytes. */ +- shrq %cl, %rdi +- /* R8 has number of bytes skipped. */ +- movl %ecx, %r8d +-1: +- /* Before jumping back to the loop, set ESI to the number of +- VEC_SIZE * 4 blocks before page crossing. */ +- movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi +- +- testq %rdi, %rdi ++ .p2align 4,, 10 ++L(more_2x_vec_till_page_cross): ++ /* If more 2x vec till cross we will complete a full loop ++ iteration here. */ ++ ++ VMOVU VEC_SIZE(%rdi), %ymm0 ++ VPCMPEQ VEC_SIZE(%rsi), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_1_end) ++ + # ifdef USE_AS_STRNCMP +- /* At this point, if %rdi value is 0, it already tested +- VEC_SIZE*4+%r10 byte starting from %rax. This label +- checks whether strncmp maximum offset reached or not. */ +- je L(string_nbyte_offset_check) +-# else +- je L(back_to_loop) ++ cmpq $(VEC_SIZE * 2), %rdx ++ jbe L(ret_zero_in_loop_page_cross) + # endif +- tzcntq %rdi, %rcx +- addq %r10, %rcx +- /* Adjust for number of bytes skipped. */ +- addq %r8, %rcx ++ ++ subl $-(VEC_SIZE * 4), %eax ++ ++ /* Safe to include comparisons from lower bytes. */ ++ VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %ymm0 ++ VPCMPEQ -(VEC_SIZE * 2)(%rsi, %rax), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_page_cross_0) ++ ++ VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %ymm0 ++ VPCMPEQ -(VEC_SIZE * 1)(%rsi, %rax), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ jnz L(return_vec_page_cross_1) ++ + # ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 2), %rcx +- subq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ /* Must check length here as length might proclude reading next ++ page. */ ++ cmpq %rax, %rdx ++ jbe L(ret_zero_in_loop_page_cross) ++# endif ++ ++ /* Finish the loop. */ ++ VMOVA (VEC_SIZE * 2)(%rdi), %ymm4 ++ VMOVA (VEC_SIZE * 3)(%rdi), %ymm6 ++ ++ VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm4, %ymm5 ++ VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm6, %ymm7 ++ vpand %ymm4, %ymm5, %ymm5 ++ vpand %ymm6, %ymm7, %ymm7 ++ VPMINU %ymm5, %ymm7, %ymm7 ++ VPCMPEQ %ymm7, %ymmZERO, %ymm7 ++ vpmovmskb %ymm7, %LOOP_REG ++ testl %LOOP_REG, %LOOP_REG ++ jnz L(return_vec_2_3_end) ++ ++ /* Best for code size to include ucond-jmp here. Would be faster ++ if this case is hot to duplicate the L(return_vec_2_3_end) code ++ as fall-through and have jump back to loop on mismatch ++ comparison. */ ++ subq $-(VEC_SIZE * 4), %rdi ++ subq $-(VEC_SIZE * 4), %rsi ++ addl $(PAGE_SIZE - VEC_SIZE * 8), %eax ++# ifdef USE_AS_STRNCMP ++ subq $(VEC_SIZE * 4), %rdx ++ ja L(loop_skip_page_cross_check) ++L(ret_zero_in_loop_page_cross): + xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ VZEROUPPER_RETURN + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rsi, %rcx), %edi +- cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax +- movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ jmp L(loop_skip_page_cross_check) + # endif +- VZEROUPPER_RETURN + ++ ++ .p2align 4,, 10 ++L(return_vec_page_cross_0): ++ addl $-VEC_SIZE, %eax ++L(return_vec_page_cross_1): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_STRNCMP +-L(string_nbyte_offset_check): +- leaq (VEC_SIZE * 4)(%r10), %r10 +- cmpq %r10, %r11 +- jbe L(zero) +- jmp L(back_to_loop) ++ leal -VEC_SIZE(%rax, %rcx), %ecx ++ cmpq %rcx, %rdx ++ jbe L(ret_zero_in_loop_page_cross) ++# else ++ addl %eax, %ecx + # endif + +- .p2align 4 +-L(cross_page_loop): +- /* Check one byte/dword at a time. */ + # ifdef USE_AS_WCSCMP +- cmpl %ecx, %eax ++ movl VEC_OFFSET(%rdi, %rcx), %edx ++ xorl %eax, %eax ++ cmpl VEC_OFFSET(%rsi, %rcx), %edx ++ je L(ret9) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else ++ movzbl VEC_OFFSET(%rdi, %rcx), %eax ++ movzbl VEC_OFFSET(%rsi, %rcx), %ecx + subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif +- jne L(different) +- addl $SIZE_OF_CHAR, %edx +- cmpl $(VEC_SIZE * 4), %edx +- je L(main_loop_header) +-# ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) ++L(ret9): ++ VZEROUPPER_RETURN ++ ++ ++ .p2align 4,, 10 ++L(page_cross): ++# ifndef USE_AS_STRNCMP ++ /* If both are VEC aligned we don't need any special logic here. ++ Only valid for strcmp where stop condition is guranteed to be ++ reachable by just reading memory. */ ++ testl $((VEC_SIZE - 1) << 20), %eax ++ jz L(no_page_cross) + # endif ++ ++ movl %edi, %eax ++ movl %esi, %ecx ++ andl $(PAGE_SIZE - 1), %eax ++ andl $(PAGE_SIZE - 1), %ecx ++ ++ xorl %OFFSET_REG, %OFFSET_REG ++ ++ /* Check which is closer to page cross, s1 or s2. */ ++ cmpl %eax, %ecx ++ jg L(page_cross_s2) ++ ++ /* The previous page cross check has false positives. Check for ++ true positive as page cross logic is very expensive. */ ++ subl $(PAGE_SIZE - VEC_SIZE * 4), %eax ++ jbe L(no_page_cross) ++ ++ /* Set r8 to not interfere with normal return value (rdi and rsi ++ did not swap). */ + # ifdef USE_AS_WCSCMP +- movl (%rdi, %rdx), %eax +- movl (%rsi, %rdx), %ecx ++ /* any non-zero positive value that doesn't inference with 0x1. ++ */ ++ movl $2, %r8d + # else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %ecx ++ xorl %r8d, %r8d + # endif +- /* Check null char. */ +- testl %eax, %eax +- jne L(cross_page_loop) +- /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED +- comparisons. */ +- subl %ecx, %eax +-# ifndef USE_AS_WCSCMP +-L(different): ++ ++ /* Check if less than 1x VEC till page cross. */ ++ subl $(VEC_SIZE * 3), %eax ++ jg L(less_1x_vec_till_page) ++ ++ /* If more than 1x VEC till page cross, loop throuh safely ++ loadable memory until within 1x VEC of page cross. */ ++ ++ .p2align 4,, 10 ++L(page_cross_loop): ++ ++ VMOVU (%rdi, %OFFSET_REG64), %ymm0 ++ VPCMPEQ (%rsi, %OFFSET_REG64), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ incl %ecx ++ ++ jnz L(check_ret_vec_page_cross) ++ addl $VEC_SIZE, %OFFSET_REG ++# ifdef USE_AS_STRNCMP ++ cmpq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross) + # endif +- VZEROUPPER_RETURN ++ addl $VEC_SIZE, %eax ++ jl L(page_cross_loop) ++ ++ subl %eax, %OFFSET_REG ++ /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed ++ to not cross page so is safe to load. Since we have already ++ loaded at least 1 VEC from rsi it is also guranteed to be safe. ++ */ ++ ++ VMOVU (%rdi, %OFFSET_REG64), %ymm0 ++ VPCMPEQ (%rsi, %OFFSET_REG64), %ymm0, %ymm1 ++ VPCMPEQ %ymm0, %ymmZERO, %ymm2 ++ vpandn %ymm1, %ymm2, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ ++# ifdef USE_AS_STRNCMP ++ leal VEC_SIZE(%OFFSET_REG64), %eax ++ cmpq %rax, %rdx ++ jbe L(check_ret_vec_page_cross2) ++ addq %rdi, %rdx ++# endif ++ incl %ecx ++ jz L(prepare_loop_no_len) + ++ .p2align 4,, 4 ++L(ret_vec_page_cross): ++# ifndef USE_AS_STRNCMP ++L(check_ret_vec_page_cross): ++# endif ++ tzcntl %ecx, %ecx ++ addl %OFFSET_REG, %ecx ++L(ret_vec_page_cross_cont): + # ifdef USE_AS_WCSCMP +- .p2align 4 +-L(different): +- /* Use movl to avoid modifying EFLAGS. */ +- movl $0, %eax ++ movl (%rdi, %rcx), %edx ++ xorl %eax, %eax ++ cmpl (%rsi, %rcx), %edx ++ je L(ret12) + setl %al + negl %eax +- orl $1, %eax +- VZEROUPPER_RETURN ++ xorl %r8d, %eax ++# else ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret12): ++ VZEROUPPER_RETURN + + # ifdef USE_AS_STRNCMP +- .p2align 4 +-L(zero): ++ .p2align 4,, 10 ++L(check_ret_vec_page_cross2): ++ incl %ecx ++L(check_ret_vec_page_cross): ++ tzcntl %ecx, %ecx ++ addl %OFFSET_REG, %ecx ++ cmpq %rcx, %rdx ++ ja L(ret_vec_page_cross_cont) ++ .p2align 4,, 2 ++L(ret_zero_page_cross): + xorl %eax, %eax + VZEROUPPER_RETURN ++# endif + +- .p2align 4 +-L(char0): +-# ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (%rdi), %ecx +- cmpl (%rsi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax +-# endif +- VZEROUPPER_RETURN ++ .p2align 4,, 4 ++L(page_cross_s2): ++ /* Ensure this is a true page cross. */ ++ subl $(PAGE_SIZE - VEC_SIZE * 4), %ecx ++ jbe L(no_page_cross) ++ ++ ++ movl %ecx, %eax ++ movq %rdi, %rcx ++ movq %rsi, %rdi ++ movq %rcx, %rsi ++ ++ /* set r8 to negate return value as rdi and rsi swapped. */ ++# ifdef USE_AS_WCSCMP ++ movl $-4, %r8d ++# else ++ movl $-1, %r8d + # endif ++ xorl %OFFSET_REG, %OFFSET_REG + +- .p2align 4 +-L(last_vector): +- addq %rdx, %rdi +- addq %rdx, %rsi ++ /* Check if more than 1x VEC till page cross. */ ++ subl $(VEC_SIZE * 3), %eax ++ jle L(page_cross_loop) ++ ++ .p2align 4,, 6 ++L(less_1x_vec_till_page): ++ /* Find largest load size we can use. */ ++ cmpl $16, %eax ++ ja L(less_16_till_page) ++ ++ VMOVU (%rdi), %xmm0 ++ VPCMPEQ (%rsi), %xmm0, %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ incw %cx ++ jnz L(check_ret_vec_page_cross) ++ movl $16, %OFFSET_REG + # ifdef USE_AS_STRNCMP +- subq %rdx, %r11 ++ cmpq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subl %eax, %OFFSET_REG ++# else ++ /* Explicit check for 16 byte alignment. */ ++ subl %eax, %OFFSET_REG ++ jz L(prepare_loop) + # endif +- tzcntl %ecx, %edx ++ ++ VMOVU (%rdi, %OFFSET_REG64), %xmm0 ++ VPCMPEQ (%rsi, %OFFSET_REG64), %xmm0, %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ incw %cx ++ jnz L(check_ret_vec_page_cross) ++ + # ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) ++ addl $16, %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subq $-(VEC_SIZE * 4), %rdx ++ ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi ++# else ++ leaq (16 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq (16 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi + # endif +-# ifdef USE_AS_WCSCMP ++ jmp L(prepare_loop_aligned) ++ ++# ifdef USE_AS_STRNCMP ++ .p2align 4,, 2 ++L(ret_zero_page_cross_slow_case0): + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax ++ ret + # endif +- VZEROUPPER_RETURN + +- /* Comparing on page boundary region requires special treatment: +- It must done one vector at the time, starting with the wider +- ymm vector if possible, if not, with xmm. If fetching 16 bytes +- (xmm) still passes the boundary, byte comparison must be done. +- */ +- .p2align 4 +-L(cross_page): +- /* Try one ymm vector at a time. */ +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(cross_page_1_vector) +-L(loop_1_vector): +- vmovdqu (%rdi, %rdx), %ymm1 +- VPCMPEQ (%rsi, %rdx), %ymm1, %ymm0 +- VPMINU %ymm1, %ymm0, %ymm0 +- VPCMPEQ %ymm7, %ymm0, %ymm0 +- vpmovmskb %ymm0, %ecx +- testl %ecx, %ecx +- jne L(last_vector) + +- addl $VEC_SIZE, %edx ++ .p2align 4,, 10 ++L(less_16_till_page): ++ /* Find largest load size we can use. */ ++ cmpl $24, %eax ++ ja L(less_8_till_page) + +- addl $VEC_SIZE, %eax +-# ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) +-# endif +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jle L(loop_1_vector) +-L(cross_page_1_vector): +- /* Less than 32 bytes to check, try one xmm vector. */ +- cmpl $(PAGE_SIZE - 16), %eax +- jg L(cross_page_1_xmm) +- vmovdqu (%rdi, %rdx), %xmm1 +- VPCMPEQ (%rsi, %rdx), %xmm1, %xmm0 +- VPMINU %xmm1, %xmm0, %xmm0 +- VPCMPEQ %xmm7, %xmm0, %xmm0 +- vpmovmskb %xmm0, %ecx +- testl %ecx, %ecx +- jne L(last_vector) ++ vmovq (%rdi), %xmm0 ++ vmovq (%rsi), %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ VPCMPEQ %xmm1, %xmm0, %xmm1 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ incb %cl ++ jnz L(check_ret_vec_page_cross) + +- addl $16, %edx +-# ifndef USE_AS_WCSCMP +- addl $16, %eax ++ ++# ifdef USE_AS_STRNCMP ++ cmpq $8, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) + # endif ++ movl $24, %OFFSET_REG ++ /* Explicit check for 16 byte alignment. */ ++ subl %eax, %OFFSET_REG ++ ++ ++ ++ vmovq (%rdi, %OFFSET_REG64), %xmm0 ++ vmovq (%rsi, %OFFSET_REG64), %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ VPCMPEQ %xmm1, %xmm0, %xmm1 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ incb %cl ++ jnz L(check_ret_vec_page_cross) ++ + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) +-# endif +- +-L(cross_page_1_xmm): +-# ifndef USE_AS_WCSCMP +- /* Less than 16 bytes to check, try 8 byte vector. NB: No need +- for wcscmp nor wcsncmp since wide char is 4 bytes. */ +- cmpl $(PAGE_SIZE - 8), %eax +- jg L(cross_page_8bytes) +- vmovq (%rdi, %rdx), %xmm1 +- vmovq (%rsi, %rdx), %xmm0 +- VPCMPEQ %xmm0, %xmm1, %xmm0 +- VPMINU %xmm1, %xmm0, %xmm0 +- VPCMPEQ %xmm7, %xmm0, %xmm0 +- vpmovmskb %xmm0, %ecx +- /* Only last 8 bits are valid. */ +- andl $0xff, %ecx +- testl %ecx, %ecx +- jne L(last_vector) ++ addl $8, %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subq $-(VEC_SIZE * 4), %rdx + +- addl $8, %edx +- addl $8, %eax ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi ++# else ++ leaq (8 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq (8 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi ++# endif ++ jmp L(prepare_loop_aligned) ++ ++ ++ .p2align 4,, 10 ++L(less_8_till_page): ++# ifdef USE_AS_WCSCMP ++ /* If using wchar then this is the only check before we reach ++ the page boundary. */ ++ movl (%rdi), %eax ++ movl (%rsi), %ecx ++ cmpl %ecx, %eax ++ jnz L(ret_less_8_wcs) + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ addq %rdi, %rdx ++ /* We already checked for len <= 1 so cannot hit that case here. ++ */ + # endif ++ testl %eax, %eax ++ jnz L(prepare_loop_no_len) ++ ret + +-L(cross_page_8bytes): +- /* Less than 8 bytes to check, try 4 byte vector. */ +- cmpl $(PAGE_SIZE - 4), %eax +- jg L(cross_page_4bytes) +- vmovd (%rdi, %rdx), %xmm1 +- vmovd (%rsi, %rdx), %xmm0 +- VPCMPEQ %xmm0, %xmm1, %xmm0 +- VPMINU %xmm1, %xmm0, %xmm0 +- VPCMPEQ %xmm7, %xmm0, %xmm0 +- vpmovmskb %xmm0, %ecx +- /* Only last 4 bits are valid. */ +- andl $0xf, %ecx +- testl %ecx, %ecx +- jne L(last_vector) ++ .p2align 4,, 8 ++L(ret_less_8_wcs): ++ setl %OFFSET_REG8 ++ negl %OFFSET_REG ++ movl %OFFSET_REG, %eax ++ xorl %r8d, %eax ++ ret ++ ++# else ++ ++ /* Find largest load size we can use. */ ++ cmpl $28, %eax ++ ja L(less_4_till_page) ++ ++ vmovd (%rdi), %xmm0 ++ vmovd (%rsi), %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ VPCMPEQ %xmm1, %xmm0, %xmm1 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ subl $0xf, %ecx ++ jnz L(check_ret_vec_page_cross) + +- addl $4, %edx + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq $4, %rdx ++ jbe L(ret_zero_page_cross_slow_case1) + # endif ++ movl $28, %OFFSET_REG ++ /* Explicit check for 16 byte alignment. */ ++ subl %eax, %OFFSET_REG + +-L(cross_page_4bytes): +-# endif +- /* Less than 4 bytes to check, try one byte/dword at a time. */ +-# ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) +-# endif +-# ifdef USE_AS_WCSCMP +- movl (%rdi, %rdx), %eax +- movl (%rsi, %rdx), %ecx +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %ecx +-# endif +- testl %eax, %eax +- jne L(cross_page_loop) ++ ++ ++ vmovd (%rdi, %OFFSET_REG64), %xmm0 ++ vmovd (%rsi, %OFFSET_REG64), %xmm1 ++ VPCMPEQ %xmm0, %xmmZERO, %xmm2 ++ VPCMPEQ %xmm1, %xmm0, %xmm1 ++ vpandn %xmm1, %xmm2, %xmm1 ++ vpmovmskb %ymm1, %ecx ++ subl $0xf, %ecx ++ jnz L(check_ret_vec_page_cross) ++ ++# ifdef USE_AS_STRNCMP ++ addl $4, %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case1) ++ subq $-(VEC_SIZE * 4), %rdx ++ ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi ++# else ++ leaq (4 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi ++ leaq (4 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi ++# endif ++ jmp L(prepare_loop_aligned) ++ ++# ifdef USE_AS_STRNCMP ++ .p2align 4,, 2 ++L(ret_zero_page_cross_slow_case1): ++ xorl %eax, %eax ++ ret ++# endif ++ ++ .p2align 4,, 10 ++L(less_4_till_page): ++ subq %rdi, %rsi ++ /* Extremely slow byte comparison loop. */ ++L(less_4_loop): ++ movzbl (%rdi), %eax ++ movzbl (%rsi, %rdi), %ecx + subl %ecx, %eax +- VZEROUPPER_RETURN +-END (STRCMP) ++ jnz L(ret_less_4_loop) ++ testl %ecx, %ecx ++ jz L(ret_zero_4_loop) ++# ifdef USE_AS_STRNCMP ++ decq %rdx ++ jz L(ret_zero_4_loop) ++# endif ++ incq %rdi ++ /* end condition is reach page boundary (rdi is aligned). */ ++ testl $31, %edi ++ jnz L(less_4_loop) ++ leaq -(VEC_SIZE * 4)(%rdi, %rsi), %rsi ++ addq $-(VEC_SIZE * 4), %rdi ++# ifdef USE_AS_STRNCMP ++ subq $-(VEC_SIZE * 4), %rdx ++# endif ++ jmp L(prepare_loop_aligned) ++ ++L(ret_zero_4_loop): ++ xorl %eax, %eax ++ ret ++L(ret_less_4_loop): ++ xorl %r8d, %eax ++ subl %r8d, %eax ++ ret ++# endif ++END(STRCMP) + #endif diff --git a/SOURCES/glibc-upstream-2.34-202.patch b/SOURCES/glibc-upstream-2.34-202.patch new file mode 100644 index 0000000..9357b6f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-202.patch @@ -0,0 +1,1987 @@ +commit c41a66767d23b7f219fb943be6fab5ddf822d7da +Author: Noah Goldstein +Date: Mon Jan 10 15:35:39 2022 -0600 + + x86: Optimize strcmp-evex.S + + Optimization are primarily to the loop logic and how the page cross + logic interacts with the loop. + + The page cross logic is at times more expensive for short strings near + the end of a page but not crossing the page. This is done to retest + the page cross conditions with a non-faulty check and to improve the + logic for entering the loop afterwards. This is only particular cases, + however, and is general made up for by more than 10x improvements on + the transition from the page cross -> loop case. + + The non-page cross cases as well are nearly universally improved. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass. + + Signed-off-by: Noah Goldstein + (cherry picked from commit 8418eb3ff4b781d31c4ed5dc6c0bd7356bc45db9) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 6f5c4bf984da2b80..99d8409af27327ad 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -26,54 +26,69 @@ + + # define PAGE_SIZE 4096 + +-/* VEC_SIZE = Number of bytes in a ymm register */ ++ /* VEC_SIZE = Number of bytes in a ymm register. */ + # define VEC_SIZE 32 ++# define CHAR_PER_VEC (VEC_SIZE / SIZE_OF_CHAR) + +-/* Shift for dividing by (VEC_SIZE * 4). */ +-# define DIVIDE_BY_VEC_4_SHIFT 7 +-# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +-# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +-# endif +- +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 + + # ifdef USE_AS_WCSCMP +-/* Compare packed dwords. */ +-# define VPCMP vpcmpd ++# define TESTEQ subl $0xff, ++ /* Compare packed dwords. */ ++# define VPCMP vpcmpd + # define VPMINU vpminud + # define VPTESTM vptestmd +-# define SHIFT_REG32 r8d +-# define SHIFT_REG64 r8 +-/* 1 dword char == 4 bytes. */ ++ /* 1 dword char == 4 bytes. */ + # define SIZE_OF_CHAR 4 + # else +-/* Compare packed bytes. */ +-# define VPCMP vpcmpb ++# define TESTEQ incl ++ /* Compare packed bytes. */ ++# define VPCMP vpcmpb + # define VPMINU vpminub + # define VPTESTM vptestmb +-# define SHIFT_REG32 ecx +-# define SHIFT_REG64 rcx +-/* 1 byte char == 1 byte. */ ++ /* 1 byte char == 1 byte. */ + # define SIZE_OF_CHAR 1 + # endif + ++# ifdef USE_AS_STRNCMP ++# define LOOP_REG r9d ++# define LOOP_REG64 r9 ++ ++# define OFFSET_REG8 r9b ++# define OFFSET_REG r9d ++# define OFFSET_REG64 r9 ++# else ++# define LOOP_REG edx ++# define LOOP_REG64 rdx ++ ++# define OFFSET_REG8 dl ++# define OFFSET_REG edx ++# define OFFSET_REG64 rdx ++# endif ++ ++# if defined USE_AS_STRNCMP || defined USE_AS_WCSCMP ++# define VEC_OFFSET 0 ++# else ++# define VEC_OFFSET (-VEC_SIZE) ++# endif ++ + # define XMMZERO xmm16 +-# define XMM0 xmm17 +-# define XMM1 xmm18 ++# define XMM0 xmm17 ++# define XMM1 xmm18 + + # define YMMZERO ymm16 +-# define YMM0 ymm17 +-# define YMM1 ymm18 +-# define YMM2 ymm19 +-# define YMM3 ymm20 +-# define YMM4 ymm21 +-# define YMM5 ymm22 +-# define YMM6 ymm23 +-# define YMM7 ymm24 +-# define YMM8 ymm25 +-# define YMM9 ymm26 +-# define YMM10 ymm27 ++# define YMM0 ymm17 ++# define YMM1 ymm18 ++# define YMM2 ymm19 ++# define YMM3 ymm20 ++# define YMM4 ymm21 ++# define YMM5 ymm22 ++# define YMM6 ymm23 ++# define YMM7 ymm24 ++# define YMM8 ymm25 ++# define YMM9 ymm26 ++# define YMM10 ymm27 + + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. +@@ -96,985 +111,1096 @@ + the maximum offset is reached before a difference is found, zero is + returned. */ + +- .section .text.evex,"ax",@progbits +-ENTRY (STRCMP) ++ .section .text.evex, "ax", @progbits ++ENTRY(STRCMP) + # ifdef USE_AS_STRNCMP +- /* Check for simple cases (0 or 1) in offset. */ +- cmp $1, %RDX_LP +- je L(char0) +- jb L(zero) +-# ifdef USE_AS_WCSCMP +-# ifndef __ILP32__ +- movq %rdx, %rcx +- /* Check if length could overflow when multiplied by +- sizeof(wchar_t). Checking top 8 bits will cover all potential +- overflow cases as well as redirect cases where its impossible to +- length to bound a valid memory region. In these cases just use +- 'wcscmp'. */ +- shrq $56, %rcx +- jnz __wcscmp_evex +-# endif +- /* Convert units: from wide to byte char. */ +- shl $2, %RDX_LP ++# ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %edx, %rdx + # endif +- /* Register %r11 tracks the maximum offset. */ +- mov %RDX_LP, %R11_LP ++ cmp $1, %RDX_LP ++ /* Signed comparison intentional. We use this branch to also ++ test cases where length >= 2^63. These very large sizes can be ++ handled with strcmp as there is no way for that length to ++ actually bound the buffer. */ ++ jle L(one_or_less) + # endif + movl %edi, %eax +- xorl %edx, %edx +- /* Make %XMMZERO (%YMMZERO) all zeros in this function. */ +- vpxorq %XMMZERO, %XMMZERO, %XMMZERO + orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax +- jg L(cross_page) +- /* Start comparing 4 vectors. */ ++ /* Shift out the bits irrelivant to page boundary ([63:12]). */ ++ sall $20, %eax ++ /* Check if s1 or s2 may cross a page in next 4x VEC loads. */ ++ cmpl $((PAGE_SIZE -(VEC_SIZE * 4)) << 20), %eax ++ ja L(page_cross) ++ ++L(no_page_cross): ++ /* Safe to compare 4x vectors. */ + VMOVU (%rdi), %YMM0 +- +- /* Each bit set in K2 represents a non-null CHAR in YMM0. */ + VPTESTM %YMM0, %YMM0, %k2 +- + /* Each bit cleared in K1 represents a mismatch or a null CHAR + in YMM0 and 32 bytes at (%rsi). */ + VPCMP $0, (%rsi), %YMM0, %k1{%k2} +- + kmovd %k1, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx +-# endif +- je L(next_3_vectors) +- tzcntl %ecx, %edx +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edx +-# endif + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx) is after the maximum +- offset (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq $CHAR_PER_VEC, %rdx ++ jbe L(vec_0_test_len) + # endif ++ ++ /* TESTEQ is `incl` for strcmp/strncmp and `subl $0xff` for ++ wcscmp/wcsncmp. */ ++ ++ /* All 1s represents all equals. TESTEQ will overflow to zero in ++ all equals case. Otherwise 1s will carry until position of first ++ mismatch. */ ++ TESTEQ %ecx ++ jz L(more_3x_vec) ++ ++ .p2align 4,, 4 ++L(return_vec_0): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP ++ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- je L(return) +-L(wcscmp_return): ++ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret0) + setl %al + negl %eax + orl $1, %eax +-L(return): + # else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif ++L(ret0): + ret + +-L(return_vec_size): +- tzcntl %ecx, %edx +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edx +-# endif + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after +- the maximum offset (%r11). */ +- addq $VEC_SIZE, %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP ++ .p2align 4,, 4 ++L(vec_0_test_len): ++ notl %ecx ++ bzhil %edx, %ecx, %eax ++ jnz L(return_vec_0) ++ /* Align if will cross fetch block. */ ++ .p2align 4,, 2 ++L(ret_zero): + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax +-# endif +-# else ++ ret ++ ++ .p2align 4,, 5 ++L(one_or_less): ++ jb L(ret_zero) + # ifdef USE_AS_WCSCMP ++ /* 'nbe' covers the case where length is negative (large ++ unsigned). */ ++ jnbe __wcscmp_evex ++ movl (%rdi), %edx + xorl %eax, %eax +- movl VEC_SIZE(%rdi, %rdx), %ecx +- cmpl VEC_SIZE(%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++ cmpl (%rsi), %edx ++ je L(ret1) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else +- movzbl VEC_SIZE(%rdi, %rdx), %eax +- movzbl VEC_SIZE(%rsi, %rdx), %edx +- subl %edx, %eax ++ /* 'nbe' covers the case where length is negative (large ++ unsigned). */ ++ jnbe __strcmp_evex ++ movzbl (%rdi), %eax ++ movzbl (%rsi), %ecx ++ subl %ecx, %eax + # endif +-# endif ++L(ret1): + ret ++# endif + +-L(return_2_vec_size): +- tzcntl %ecx, %edx ++ .p2align 4,, 10 ++L(return_vec_1): ++ tzcntl %ecx, %ecx ++# ifdef USE_AS_STRNCMP ++ /* rdx must be > CHAR_PER_VEC so its safe to subtract without ++ worrying about underflow. */ ++ addq $-CHAR_PER_VEC, %rdx ++ cmpq %rcx, %rdx ++ jbe L(ret_zero) ++# endif + # ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edx ++ movl VEC_SIZE(%rdi, %rcx, SIZE_OF_CHAR), %edx ++ xorl %eax, %eax ++ cmpl VEC_SIZE(%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret2) ++ setl %al ++ negl %eax ++ orl $1, %eax ++# else ++ movzbl VEC_SIZE(%rdi, %rcx), %eax ++ movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif ++L(ret2): ++ ret ++ ++ .p2align 4,, 10 + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is +- after the maximum offset (%r11). */ +- addq $(VEC_SIZE * 2), %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++L(return_vec_3): ++# if CHAR_PER_VEC <= 16 ++ sall $CHAR_PER_VEC, %ecx + # else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax ++ salq $CHAR_PER_VEC, %rcx + # endif ++# endif ++L(return_vec_2): ++# if (CHAR_PER_VEC <= 16) || !(defined USE_AS_STRNCMP) ++ tzcntl %ecx, %ecx + # else +-# ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx +- cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax +- movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx +- subl %edx, %eax +-# endif ++ tzcntq %rcx, %rcx + # endif +- ret + +-L(return_3_vec_size): +- tzcntl %ecx, %edx +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edx +-# endif + # ifdef USE_AS_STRNCMP +- /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is +- after the maximum offset (%r11). */ +- addq $(VEC_SIZE * 3), %rdx +- cmpq %r11, %rdx +- jae L(zero) +-# ifdef USE_AS_WCSCMP ++ cmpq %rcx, %rdx ++ jbe L(ret_zero) ++# endif ++ ++# ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 2)(%rdi, %rcx, SIZE_OF_CHAR), %edx + xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax +-# endif ++ cmpl (VEC_SIZE * 2)(%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret3) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else ++ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++# endif ++L(ret3): ++ ret ++ ++# ifndef USE_AS_STRNCMP ++ .p2align 4,, 10 ++L(return_vec_3): ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP ++ movl (VEC_SIZE * 3)(%rdi, %rcx, SIZE_OF_CHAR), %edx + xorl %eax, %eax +- movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx +- cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++ cmpl (VEC_SIZE * 3)(%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret4) ++ setl %al ++ negl %eax ++ orl $1, %eax + # else +- movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax +- movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx +- subl %edx, %eax ++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif +-# endif ++L(ret4): + ret ++# endif + +- .p2align 4 +-L(next_3_vectors): +- VMOVU VEC_SIZE(%rdi), %YMM0 +- /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ /* 32 byte align here ensures the main loop is ideally aligned ++ for DSB. */ ++ .p2align 5 ++L(more_3x_vec): ++ /* Safe to compare 4x vectors. */ ++ VMOVU (VEC_SIZE)(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM0 and 32 bytes at VEC_SIZE(%rsi). */ +- VPCMP $0, VEC_SIZE(%rsi), %YMM0, %k1{%k2} ++ VPCMP $0, (VEC_SIZE)(%rsi), %YMM0, %k1{%k2} + kmovd %k1, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_1) ++ ++# ifdef USE_AS_STRNCMP ++ subq $(CHAR_PER_VEC * 2), %rdx ++ jbe L(ret_zero) + # endif +- jne L(return_vec_size) + + VMOVU (VEC_SIZE * 2)(%rdi), %YMM0 +- /* Each bit set in K2 represents a non-null CHAR in YMM0. */ + VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ + VPCMP $0, (VEC_SIZE * 2)(%rsi), %YMM0, %k1{%k2} + kmovd %k1, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx +-# endif +- jne L(return_2_vec_size) ++ TESTEQ %ecx ++ jnz L(return_vec_2) + + VMOVU (VEC_SIZE * 3)(%rdi), %YMM0 +- /* Each bit set in K2 represents a non-null CHAR in YMM0. */ + VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ + VPCMP $0, (VEC_SIZE * 3)(%rsi), %YMM0, %k1{%k2} + kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_3) ++ ++# ifdef USE_AS_STRNCMP ++ cmpq $(CHAR_PER_VEC * 2), %rdx ++ jbe L(ret_zero) ++# endif ++ ++ + # ifdef USE_AS_WCSCMP +- subl $0xff, %ecx ++ /* any non-zero positive value that doesn't inference with 0x1. ++ */ ++ movl $2, %r8d ++ + # else +- incl %ecx ++ xorl %r8d, %r8d + # endif +- jne L(return_3_vec_size) +-L(main_loop_header): +- leaq (VEC_SIZE * 4)(%rdi), %rdx +- movl $PAGE_SIZE, %ecx +- /* Align load via RAX. */ +- andq $-(VEC_SIZE * 4), %rdx +- subq %rdi, %rdx +- leaq (%rdi, %rdx), %rax ++ ++ /* The prepare labels are various entry points from the page ++ cross logic. */ ++L(prepare_loop): ++ + # ifdef USE_AS_STRNCMP +- /* Starting from this point, the maximum offset, or simply the +- 'offset', DECREASES by the same amount when base pointers are +- moved forward. Return 0 when: +- 1) On match: offset <= the matched vector index. +- 2) On mistmach, offset is before the mistmatched index. +- */ +- subq %rdx, %r11 +- jbe L(zero) ++# ifdef USE_AS_WCSCMP ++L(prepare_loop_no_len): ++ movl %edi, %ecx ++ andl $(VEC_SIZE * 4 - 1), %ecx ++ shrl $2, %ecx ++ leaq (CHAR_PER_VEC * 2)(%rdx, %rcx), %rdx ++# else ++ /* Store N + (VEC_SIZE * 4) and place check at the begining of ++ the loop. */ ++ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx ++L(prepare_loop_no_len): ++# endif ++# else ++L(prepare_loop_no_len): + # endif +- addq %rsi, %rdx +- movq %rdx, %rsi +- andl $(PAGE_SIZE - 1), %esi +- /* Number of bytes before page crossing. */ +- subq %rsi, %rcx +- /* Number of VEC_SIZE * 4 blocks before page crossing. */ +- shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx +- /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */ +- movl %ecx, %esi +- jmp L(loop_start) + ++ /* Align s1 and adjust s2 accordingly. */ ++ subq %rdi, %rsi ++ andq $-(VEC_SIZE * 4), %rdi ++L(prepare_loop_readj): ++ addq %rdi, %rsi ++# if (defined USE_AS_STRNCMP) && !(defined USE_AS_WCSCMP) ++ subq %rdi, %rdx ++# endif ++ ++L(prepare_loop_aligned): ++ /* eax stores distance from rsi to next page cross. These cases ++ need to be handled specially as the 4x loop could potentially ++ read memory past the length of s1 or s2 and across a page ++ boundary. */ ++ movl $-(VEC_SIZE * 4), %eax ++ subl %esi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ ++ vpxorq %YMMZERO, %YMMZERO, %YMMZERO ++ ++ /* Loop 4x comparisons at a time. */ + .p2align 4 + L(loop): ++ ++ /* End condition for strncmp. */ + # ifdef USE_AS_STRNCMP +- /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease +- the maximum offset (%r11) by the same amount. */ +- subq $(VEC_SIZE * 4), %r11 +- jbe L(zero) ++ subq $(CHAR_PER_VEC * 4), %rdx ++ jbe L(ret_zero) + # endif +- addq $(VEC_SIZE * 4), %rax +- addq $(VEC_SIZE * 4), %rdx +-L(loop_start): +- testl %esi, %esi +- leal -1(%esi), %esi +- je L(loop_cross_page) +-L(back_to_loop): +- /* Main loop, comparing 4 vectors are a time. */ +- VMOVA (%rax), %YMM0 +- VMOVA VEC_SIZE(%rax), %YMM2 +- VMOVA (VEC_SIZE * 2)(%rax), %YMM4 +- VMOVA (VEC_SIZE * 3)(%rax), %YMM6 ++ ++ subq $-(VEC_SIZE * 4), %rdi ++ subq $-(VEC_SIZE * 4), %rsi ++ ++ /* Check if rsi loads will cross a page boundary. */ ++ addl $-(VEC_SIZE * 4), %eax ++ jnb L(page_cross_during_loop) ++ ++ /* Loop entry after handling page cross during loop. */ ++L(loop_skip_page_cross_check): ++ VMOVA (VEC_SIZE * 0)(%rdi), %YMM0 ++ VMOVA (VEC_SIZE * 1)(%rdi), %YMM2 ++ VMOVA (VEC_SIZE * 2)(%rdi), %YMM4 ++ VMOVA (VEC_SIZE * 3)(%rdi), %YMM6 + + VPMINU %YMM0, %YMM2, %YMM8 + VPMINU %YMM4, %YMM6, %YMM9 + +- /* A zero CHAR in YMM8 means that there is a null CHAR. */ +- VPMINU %YMM8, %YMM9, %YMM8 ++ /* A zero CHAR in YMM9 means that there is a null CHAR. */ ++ VPMINU %YMM8, %YMM9, %YMM9 + + /* Each bit set in K1 represents a non-null CHAR in YMM8. */ +- VPTESTM %YMM8, %YMM8, %k1 ++ VPTESTM %YMM9, %YMM9, %k1 + +- /* (YMM ^ YMM): A non-zero CHAR represents a mismatch. */ +- vpxorq (%rdx), %YMM0, %YMM1 +- vpxorq VEC_SIZE(%rdx), %YMM2, %YMM3 +- vpxorq (VEC_SIZE * 2)(%rdx), %YMM4, %YMM5 +- vpxorq (VEC_SIZE * 3)(%rdx), %YMM6, %YMM7 ++ vpxorq (VEC_SIZE * 0)(%rsi), %YMM0, %YMM1 ++ vpxorq (VEC_SIZE * 1)(%rsi), %YMM2, %YMM3 ++ vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5 ++ /* Ternary logic to xor (VEC_SIZE * 3)(%rsi) with YMM6 while ++ oring with YMM1. Result is stored in YMM6. */ ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM1, %YMM6 + +- vporq %YMM1, %YMM3, %YMM9 +- vporq %YMM5, %YMM7, %YMM10 ++ /* Or together YMM3, YMM5, and YMM6. */ ++ vpternlogd $0xfe, %YMM3, %YMM5, %YMM6 + +- /* A non-zero CHAR in YMM9 represents a mismatch. */ +- vporq %YMM9, %YMM10, %YMM9 + +- /* Each bit cleared in K0 represents a mismatch or a null CHAR. */ +- VPCMP $0, %YMMZERO, %YMM9, %k0{%k1} +- kmovd %k0, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx +-# endif +- je L(loop) ++ /* A non-zero CHAR in YMM6 represents a mismatch. */ ++ VPCMP $0, %YMMZERO, %YMM6, %k0{%k1} ++ kmovd %k0, %LOOP_REG + +- /* Each bit set in K1 represents a non-null CHAR in YMM0. */ ++ TESTEQ %LOOP_REG ++ jz L(loop) ++ ++ ++ /* Find which VEC has the mismatch of end of string. */ + VPTESTM %YMM0, %YMM0, %k1 +- /* Each bit cleared in K0 represents a mismatch or a null CHAR +- in YMM0 and (%rdx). */ + VPCMP $0, %YMMZERO, %YMM1, %k0{%k1} + kmovd %k0, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx +-# endif +- je L(test_vec) +- tzcntl %ecx, %ecx +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %ecx +-# endif +-# ifdef USE_AS_STRNCMP +- cmpq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif +-# else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif +-# endif +- ret ++ TESTEQ %ecx ++ jnz L(return_vec_0_end) + +- .p2align 4 +-L(test_vec): +-# ifdef USE_AS_STRNCMP +- /* The first vector matched. Return 0 if the maximum offset +- (%r11) <= VEC_SIZE. */ +- cmpq $VEC_SIZE, %r11 +- jbe L(zero) +-# endif +- /* Each bit set in K1 represents a non-null CHAR in YMM2. */ + VPTESTM %YMM2, %YMM2, %k1 +- /* Each bit cleared in K0 represents a mismatch or a null CHAR +- in YMM2 and VEC_SIZE(%rdx). */ + VPCMP $0, %YMMZERO, %YMM3, %k0{%k1} + kmovd %k0, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx +-# else +- incl %ecx +-# endif +- je L(test_2_vec) +- tzcntl %ecx, %edi +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edi +-# endif +-# ifdef USE_AS_STRNCMP +- addq $VEC_SIZE, %rdi +- cmpq %rdi, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rdi), %ecx +- cmpl (%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rdi), %eax +- movzbl (%rdx, %rdi), %edx +- subl %edx, %eax +-# endif +-# else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl VEC_SIZE(%rsi, %rdi), %ecx +- cmpl VEC_SIZE(%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl VEC_SIZE(%rax, %rdi), %eax +- movzbl VEC_SIZE(%rdx, %rdi), %edx +- subl %edx, %eax +-# endif +-# endif +- ret ++ TESTEQ %ecx ++ jnz L(return_vec_1_end) + +- .p2align 4 +-L(test_2_vec): ++ ++ /* Handle VEC 2 and 3 without branches. */ ++L(return_vec_2_3_end): + # ifdef USE_AS_STRNCMP +- /* The first 2 vectors matched. Return 0 if the maximum offset +- (%r11) <= 2 * VEC_SIZE. */ +- cmpq $(VEC_SIZE * 2), %r11 +- jbe L(zero) ++ subq $(CHAR_PER_VEC * 2), %rdx ++ jbe L(ret_zero_end) + # endif +- /* Each bit set in K1 represents a non-null CHAR in YMM4. */ ++ + VPTESTM %YMM4, %YMM4, %k1 +- /* Each bit cleared in K0 represents a mismatch or a null CHAR +- in YMM4 and (VEC_SIZE * 2)(%rdx). */ + VPCMP $0, %YMMZERO, %YMM5, %k0{%k1} + kmovd %k0, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0xff, %ecx ++ TESTEQ %ecx ++# if CHAR_PER_VEC <= 16 ++ sall $CHAR_PER_VEC, %LOOP_REG ++ orl %ecx, %LOOP_REG + # else +- incl %ecx ++ salq $CHAR_PER_VEC, %LOOP_REG64 ++ orq %rcx, %LOOP_REG64 ++# endif ++L(return_vec_3_end): ++ /* LOOP_REG contains matches for null/mismatch from the loop. If ++ VEC 0,1,and 2 all have no null and no mismatches then mismatch ++ must entirely be from VEC 3 which is fully represented by ++ LOOP_REG. */ ++# if CHAR_PER_VEC <= 16 ++ tzcntl %LOOP_REG, %LOOP_REG ++# else ++ tzcntq %LOOP_REG64, %LOOP_REG64 ++# endif ++# ifdef USE_AS_STRNCMP ++ cmpq %LOOP_REG64, %rdx ++ jbe L(ret_zero_end) + # endif +- je L(test_3_vec) +- tzcntl %ecx, %edi ++ + # ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edi ++ movl (VEC_SIZE * 2)(%rdi, %LOOP_REG64, SIZE_OF_CHAR), %ecx ++ xorl %eax, %eax ++ cmpl (VEC_SIZE * 2)(%rsi, %LOOP_REG64, SIZE_OF_CHAR), %ecx ++ je L(ret5) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax ++# else ++ movzbl (VEC_SIZE * 2)(%rdi, %LOOP_REG64), %eax ++ movzbl (VEC_SIZE * 2)(%rsi, %LOOP_REG64), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret5): ++ ret ++ + # ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 2), %rdi +- cmpq %rdi, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ .p2align 4,, 2 ++L(ret_zero_end): + xorl %eax, %eax +- movl (%rsi, %rdi), %ecx +- cmpl (%rdx, %rdi), %ecx +- jne L(wcscmp_return) ++ ret ++# endif ++ ++ ++ /* The L(return_vec_N_end) differ from L(return_vec_N) in that ++ they use the value of `r8` to negate the return value. This is ++ because the page cross logic can swap `rdi` and `rsi`. */ ++ .p2align 4,, 10 ++# ifdef USE_AS_STRNCMP ++L(return_vec_1_end): ++# if CHAR_PER_VEC <= 16 ++ sall $CHAR_PER_VEC, %ecx + # else +- movzbl (%rax, %rdi), %eax +- movzbl (%rdx, %rdi), %edx +- subl %edx, %eax ++ salq $CHAR_PER_VEC, %rcx + # endif ++# endif ++L(return_vec_0_end): ++# if (CHAR_PER_VEC <= 16) || !(defined USE_AS_STRNCMP) ++ tzcntl %ecx, %ecx + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx +- cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax +- movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx +- subl %edx, %eax +-# endif ++ tzcntq %rcx, %rcx + # endif +- ret + +- .p2align 4 +-L(test_3_vec): + # ifdef USE_AS_STRNCMP +- /* The first 3 vectors matched. Return 0 if the maximum offset +- (%r11) <= 3 * VEC_SIZE. */ +- cmpq $(VEC_SIZE * 3), %r11 +- jbe L(zero) ++ cmpq %rcx, %rdx ++ jbe L(ret_zero_end) + # endif +- /* Each bit set in K1 represents a non-null CHAR in YMM6. */ +- VPTESTM %YMM6, %YMM6, %k1 +- /* Each bit cleared in K0 represents a mismatch or a null CHAR +- in YMM6 and (VEC_SIZE * 3)(%rdx). */ +- VPCMP $0, %YMMZERO, %YMM7, %k0{%k1} +- kmovd %k0, %ecx ++ + # ifdef USE_AS_WCSCMP +- subl $0xff, %ecx ++ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx ++ xorl %eax, %eax ++ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret6) ++ setl %al ++ negl %eax ++ /* This is the non-zero case for `eax` so just xorl with `r8d` ++ flip is `rdi` and `rsi` where swapped. */ ++ xorl %r8d, %eax + # else +- incl %ecx ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ /* Flip `eax` if `rdi` and `rsi` where swapped in page cross ++ logic. Subtract `r8d` after xor for zero case. */ ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret6): ++ ret ++ ++# ifndef USE_AS_STRNCMP ++ .p2align 4,, 10 ++L(return_vec_1_end): + tzcntl %ecx, %ecx +-# ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %ecx +-# endif +-# ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 3), %rcx +- cmpq %rcx, %r11 +- jbe L(zero) + # ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ movl VEC_SIZE(%rdi, %rcx, SIZE_OF_CHAR), %edx + xorl %eax, %eax +- movl (%rsi, %rcx), %esi +- cmpl (%rdx, %rcx), %esi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif +-# else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (VEC_SIZE * 3)(%rsi, %rcx), %esi +- cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi +- jne L(wcscmp_return) ++ cmpl VEC_SIZE(%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret7) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else +- movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax +- movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx +- subl %edx, %eax ++ movzbl VEC_SIZE(%rdi, %rcx), %eax ++ movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif +-# endif ++L(ret7): + ret +- +- .p2align 4 +-L(loop_cross_page): +- xorl %r10d, %r10d +- movq %rdx, %rcx +- /* Align load via RDX. We load the extra ECX bytes which should +- be ignored. */ +- andl $((VEC_SIZE * 4) - 1), %ecx +- /* R10 is -RCX. */ +- subq %rcx, %r10 +- +- /* This works only if VEC_SIZE * 2 == 64. */ +-# if (VEC_SIZE * 2) != 64 +-# error (VEC_SIZE * 2) != 64 + # endif + +- /* Check if the first VEC_SIZE * 2 bytes should be ignored. */ +- cmpl $(VEC_SIZE * 2), %ecx +- jge L(loop_cross_page_2_vec) + +- VMOVU (%rax, %r10), %YMM2 +- VMOVU VEC_SIZE(%rax, %r10), %YMM3 ++ /* Page cross in rsi in next 4x VEC. */ + +- /* Each bit set in K2 represents a non-null CHAR in YMM2. */ +- VPTESTM %YMM2, %YMM2, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM2 and 32 bytes at (%rdx, %r10). */ +- VPCMP $0, (%rdx, %r10), %YMM2, %k1{%k2} +- kmovd %k1, %r9d +- /* Don't use subl since it is the lower 16/32 bits of RDI +- below. */ +- notl %r9d +-# ifdef USE_AS_WCSCMP +- /* Only last 8 bits are valid. */ +- andl $0xff, %r9d +-# endif ++ /* TODO: Improve logic here. */ ++ .p2align 4,, 10 ++L(page_cross_during_loop): ++ /* eax contains [distance_from_page - (VEC_SIZE * 4)]. */ + +- /* Each bit set in K4 represents a non-null CHAR in YMM3. */ +- VPTESTM %YMM3, %YMM3, %k4 +- /* Each bit cleared in K3 represents a mismatch or a null CHAR +- in YMM3 and 32 bytes at VEC_SIZE(%rdx, %r10). */ +- VPCMP $0, VEC_SIZE(%rdx, %r10), %YMM3, %k3{%k4} +- kmovd %k3, %edi +- /* Must use notl %edi here as lower bits are for CHAR +- comparisons potentially out of range thus can be 0 without +- indicating mismatch. */ +- notl %edi +-# ifdef USE_AS_WCSCMP +- /* Don't use subl since it is the upper 8 bits of EDI below. */ +- andl $0xff, %edi +-# endif ++ /* Optimistically rsi and rdi and both aligned in which case we ++ don't need any logic here. */ ++ cmpl $-(VEC_SIZE * 4), %eax ++ /* Don't adjust eax before jumping back to loop and we will ++ never hit page cross case again. */ ++ je L(loop_skip_page_cross_check) + +-# ifdef USE_AS_WCSCMP +- /* NB: Each bit in EDI/R9D represents 4-byte element. */ +- sall $8, %edi +- /* NB: Divide shift count by 4 since each bit in K1 represent 4 +- bytes. */ +- movl %ecx, %SHIFT_REG32 +- sarl $2, %SHIFT_REG32 +- +- /* Each bit in EDI represents a null CHAR or a mismatch. */ +- orl %r9d, %edi +-# else +- salq $32, %rdi ++ /* Check if we can safely load a VEC. */ ++ cmpl $-(VEC_SIZE * 3), %eax ++ jle L(less_1x_vec_till_page_cross) + +- /* Each bit in RDI represents a null CHAR or a mismatch. */ +- orq %r9, %rdi +-# endif ++ VMOVA (%rdi), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, (%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_0_end) ++ ++ /* if distance >= 2x VEC then eax > -(VEC_SIZE * 2). */ ++ cmpl $-(VEC_SIZE * 2), %eax ++ jg L(more_2x_vec_till_page_cross) ++ ++ .p2align 4,, 4 ++L(less_1x_vec_till_page_cross): ++ subl $-(VEC_SIZE * 4), %eax ++ /* Guranteed safe to read from rdi - VEC_SIZE here. The only ++ concerning case is first iteration if incoming s1 was near start ++ of a page and s2 near end. If s1 was near the start of the page ++ we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe ++ to read back -VEC_SIZE. If rdi is truly at the start of a page ++ here, it means the previous page (rdi - VEC_SIZE) has already ++ been loaded earlier so must be valid. */ ++ VMOVU -VEC_SIZE(%rdi, %rax), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, -VEC_SIZE(%rsi, %rax), %YMM0, %k1{%k2} ++ ++ /* Mask of potentially valid bits. The lower bits can be out of ++ range comparisons (but safe regarding page crosses). */ + +- /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ +- shrxq %SHIFT_REG64, %rdi, %rdi +- testq %rdi, %rdi +- je L(loop_cross_page_2_vec) +- tzcntq %rdi, %rcx + # ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %ecx ++ movl $-1, %r10d ++ movl %esi, %ecx ++ andl $(VEC_SIZE - 1), %ecx ++ shrl $2, %ecx ++ shlxl %ecx, %r10d, %ecx ++ movzbl %cl, %r10d ++# else ++ movl $-1, %ecx ++ shlxl %esi, %ecx, %r10d + # endif ++ ++ kmovd %k1, %ecx ++ notl %ecx ++ ++ + # ifdef USE_AS_STRNCMP +- cmpq %rcx, %r11 +- jbe L(zero) + # ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) ++ movl %eax, %r11d ++ shrl $2, %r11d ++ cmpq %r11, %rdx + # else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax ++ cmpq %rax, %rdx + # endif ++ jbe L(return_page_cross_end_check) ++# endif ++ movl %eax, %OFFSET_REG ++ ++ /* Readjust eax before potentially returning to the loop. */ ++ addl $(PAGE_SIZE - VEC_SIZE * 4), %eax ++ ++ andl %r10d, %ecx ++ jz L(loop_skip_page_cross_check) ++ ++ .p2align 4,, 3 ++L(return_page_cross_end): ++ tzcntl %ecx, %ecx ++ ++# if (defined USE_AS_STRNCMP) || (defined USE_AS_WCSCMP) ++ leal -VEC_SIZE(%OFFSET_REG64, %rcx, SIZE_OF_CHAR), %ecx ++L(return_page_cross_cmp_mem): + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ addl %OFFSET_REG, %ecx ++# endif ++# ifdef USE_AS_WCSCMP ++ movl VEC_OFFSET(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ cmpl VEC_OFFSET(%rsi, %rcx), %edx ++ je L(ret8) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax ++# else ++ movzbl VEC_OFFSET(%rdi, %rcx), %eax ++ movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret8): + ret + +- .p2align 4 +-L(loop_cross_page_2_vec): +- /* The first VEC_SIZE * 2 bytes match or are ignored. */ +- VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 +- VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 ++# ifdef USE_AS_STRNCMP ++ .p2align 4,, 10 ++L(return_page_cross_end_check): ++ tzcntl %ecx, %ecx ++ leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx ++# ifdef USE_AS_WCSCMP ++ sall $2, %edx ++# endif ++ cmpl %ecx, %edx ++ ja L(return_page_cross_cmp_mem) ++ xorl %eax, %eax ++ ret ++# endif ++ + ++ .p2align 4,, 10 ++L(more_2x_vec_till_page_cross): ++ /* If more 2x vec till cross we will complete a full loop ++ iteration here. */ ++ ++ VMOVA VEC_SIZE(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rdx, %r10). */ +- VPCMP $0, (VEC_SIZE * 2)(%rdx, %r10), %YMM0, %k1{%k2} +- kmovd %k1, %r9d +- /* Don't use subl since it is the lower 16/32 bits of RDI +- below. */ +- notl %r9d +-# ifdef USE_AS_WCSCMP +- /* Only last 8 bits are valid. */ +- andl $0xff, %r9d +-# endif ++ VPCMP $0, VEC_SIZE(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_1_end) + +- VPTESTM %YMM1, %YMM1, %k4 +- /* Each bit cleared in K3 represents a mismatch or a null CHAR +- in YMM1 and 32 bytes at (VEC_SIZE * 3)(%rdx, %r10). */ +- VPCMP $0, (VEC_SIZE * 3)(%rdx, %r10), %YMM1, %k3{%k4} +- kmovd %k3, %edi +- /* Must use notl %edi here as lower bits are for CHAR +- comparisons potentially out of range thus can be 0 without +- indicating mismatch. */ +- notl %edi +-# ifdef USE_AS_WCSCMP +- /* Don't use subl since it is the upper 8 bits of EDI below. */ +- andl $0xff, %edi ++# ifdef USE_AS_STRNCMP ++ cmpq $(CHAR_PER_VEC * 2), %rdx ++ jbe L(ret_zero_in_loop_page_cross) + # endif + +-# ifdef USE_AS_WCSCMP +- /* NB: Each bit in EDI/R9D represents 4-byte element. */ +- sall $8, %edi ++ subl $-(VEC_SIZE * 4), %eax + +- /* Each bit in EDI represents a null CHAR or a mismatch. */ +- orl %r9d, %edi +-# else +- salq $32, %rdi ++ /* Safe to include comparisons from lower bytes. */ ++ VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, -(VEC_SIZE * 2)(%rsi, %rax), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_page_cross_0) ++ ++ VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, -(VEC_SIZE * 1)(%rsi, %rax), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(return_vec_page_cross_1) + +- /* Each bit in RDI represents a null CHAR or a mismatch. */ +- orq %r9, %rdi ++# ifdef USE_AS_STRNCMP ++ /* Must check length here as length might proclude reading next ++ page. */ ++# ifdef USE_AS_WCSCMP ++ movl %eax, %r11d ++ shrl $2, %r11d ++ cmpq %r11, %rdx ++# else ++ cmpq %rax, %rdx ++# endif ++ jbe L(ret_zero_in_loop_page_cross) + # endif + +- xorl %r8d, %r8d +- /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ +- subl $(VEC_SIZE * 2), %ecx +- jle 1f +- /* R8 has number of bytes skipped. */ +- movl %ecx, %r8d +-# ifdef USE_AS_WCSCMP +- /* NB: Divide shift count by 4 since each bit in RDI represent 4 +- bytes. */ +- sarl $2, %ecx +- /* Skip ECX bytes. */ +- shrl %cl, %edi ++ /* Finish the loop. */ ++ VMOVA (VEC_SIZE * 2)(%rdi), %YMM4 ++ VMOVA (VEC_SIZE * 3)(%rdi), %YMM6 ++ VPMINU %YMM4, %YMM6, %YMM9 ++ VPTESTM %YMM9, %YMM9, %k1 ++ ++ vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5 ++ /* YMM6 = YMM5 | ((VEC_SIZE * 3)(%rsi) ^ YMM6). */ ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM5, %YMM6 ++ ++ VPCMP $0, %YMMZERO, %YMM6, %k0{%k1} ++ kmovd %k0, %LOOP_REG ++ TESTEQ %LOOP_REG ++ jnz L(return_vec_2_3_end) ++ ++ /* Best for code size to include ucond-jmp here. Would be faster ++ if this case is hot to duplicate the L(return_vec_2_3_end) code ++ as fall-through and have jump back to loop on mismatch ++ comparison. */ ++ subq $-(VEC_SIZE * 4), %rdi ++ subq $-(VEC_SIZE * 4), %rsi ++ addl $(PAGE_SIZE - VEC_SIZE * 8), %eax ++# ifdef USE_AS_STRNCMP ++ subq $(CHAR_PER_VEC * 4), %rdx ++ ja L(loop_skip_page_cross_check) ++L(ret_zero_in_loop_page_cross): ++ xorl %eax, %eax ++ ret + # else +- /* Skip ECX bytes. */ +- shrq %cl, %rdi ++ jmp L(loop_skip_page_cross_check) + # endif +-1: +- /* Before jumping back to the loop, set ESI to the number of +- VEC_SIZE * 4 blocks before page crossing. */ +- movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi + +- testq %rdi, %rdi +-# ifdef USE_AS_STRNCMP +- /* At this point, if %rdi value is 0, it already tested +- VEC_SIZE*4+%r10 byte starting from %rax. This label +- checks whether strncmp maximum offset reached or not. */ +- je L(string_nbyte_offset_check) ++ ++ .p2align 4,, 10 ++L(return_vec_page_cross_0): ++ addl $-VEC_SIZE, %eax ++L(return_vec_page_cross_1): ++ tzcntl %ecx, %ecx ++# if defined USE_AS_STRNCMP || defined USE_AS_WCSCMP ++ leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx ++# ifdef USE_AS_STRNCMP ++# ifdef USE_AS_WCSCMP ++ /* Must divide ecx instead of multiply rdx due to overflow. */ ++ movl %ecx, %eax ++ shrl $2, %eax ++ cmpq %rax, %rdx ++# else ++ cmpq %rcx, %rdx ++# endif ++ jbe L(ret_zero_in_loop_page_cross) ++# endif + # else +- je L(back_to_loop) ++ addl %eax, %ecx + # endif +- tzcntq %rdi, %rcx ++ + # ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %ecx +-# endif +- addq %r10, %rcx +- /* Adjust for number of bytes skipped. */ +- addq %r8, %rcx +-# ifdef USE_AS_STRNCMP +- addq $(VEC_SIZE * 2), %rcx +- subq %rcx, %r11 +- jbe L(zero) +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi ++ movl VEC_OFFSET(%rdi, %rcx), %edx + xorl %eax, %eax +- movl (%rsi, %rcx), %edi +- cmpl (%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (%rax, %rcx), %eax +- movzbl (%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ cmpl VEC_OFFSET(%rsi, %rcx), %edx ++ je L(ret9) ++ setl %al ++ negl %eax ++ xorl %r8d, %eax + # else +-# ifdef USE_AS_WCSCMP +- movq %rax, %rsi +- xorl %eax, %eax +- movl (VEC_SIZE * 2)(%rsi, %rcx), %edi +- cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi +- jne L(wcscmp_return) +-# else +- movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax +- movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx +- subl %edx, %eax +-# endif ++ movzbl VEC_OFFSET(%rdi, %rcx), %eax ++ movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret9): + ret + +-# ifdef USE_AS_STRNCMP +-L(string_nbyte_offset_check): +- leaq (VEC_SIZE * 4)(%r10), %r10 +- cmpq %r10, %r11 +- jbe L(zero) +- jmp L(back_to_loop) ++ ++ .p2align 4,, 10 ++L(page_cross): ++# ifndef USE_AS_STRNCMP ++ /* If both are VEC aligned we don't need any special logic here. ++ Only valid for strcmp where stop condition is guranteed to be ++ reachable by just reading memory. */ ++ testl $((VEC_SIZE - 1) << 20), %eax ++ jz L(no_page_cross) + # endif + +- .p2align 4 +-L(cross_page_loop): +- /* Check one byte/dword at a time. */ ++ movl %edi, %eax ++ movl %esi, %ecx ++ andl $(PAGE_SIZE - 1), %eax ++ andl $(PAGE_SIZE - 1), %ecx ++ ++ xorl %OFFSET_REG, %OFFSET_REG ++ ++ /* Check which is closer to page cross, s1 or s2. */ ++ cmpl %eax, %ecx ++ jg L(page_cross_s2) ++ ++ /* The previous page cross check has false positives. Check for ++ true positive as page cross logic is very expensive. */ ++ subl $(PAGE_SIZE - VEC_SIZE * 4), %eax ++ jbe L(no_page_cross) ++ ++ ++ /* Set r8 to not interfere with normal return value (rdi and rsi ++ did not swap). */ + # ifdef USE_AS_WCSCMP +- cmpl %ecx, %eax ++ /* any non-zero positive value that doesn't inference with 0x1. ++ */ ++ movl $2, %r8d + # else +- subl %ecx, %eax ++ xorl %r8d, %r8d + # endif +- jne L(different) +- addl $SIZE_OF_CHAR, %edx +- cmpl $(VEC_SIZE * 4), %edx +- je L(main_loop_header) ++ ++ /* Check if less than 1x VEC till page cross. */ ++ subl $(VEC_SIZE * 3), %eax ++ jg L(less_1x_vec_till_page) ++ ++ ++ /* If more than 1x VEC till page cross, loop throuh safely ++ loadable memory until within 1x VEC of page cross. */ ++ .p2align 4,, 8 ++L(page_cross_loop): ++ VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++ TESTEQ %ecx ++ jnz L(check_ret_vec_page_cross) ++ addl $CHAR_PER_VEC, %OFFSET_REG + # ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross) + # endif ++ addl $VEC_SIZE, %eax ++ jl L(page_cross_loop) ++ + # ifdef USE_AS_WCSCMP +- movl (%rdi, %rdx), %eax +- movl (%rsi, %rdx), %ecx +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %ecx ++ shrl $2, %eax + # endif +- /* Check null CHAR. */ +- testl %eax, %eax +- jne L(cross_page_loop) +- /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED +- comparisons. */ +- subl %ecx, %eax +-# ifndef USE_AS_WCSCMP +-L(different): ++ ++ ++ subl %eax, %OFFSET_REG ++ /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed ++ to not cross page so is safe to load. Since we have already ++ loaded at least 1 VEC from rsi it is also guranteed to be safe. ++ */ ++ VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0 ++ VPTESTM %YMM0, %YMM0, %k2 ++ VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0, %k1{%k2} ++ ++ kmovd %k1, %ecx ++# ifdef USE_AS_STRNCMP ++ leal CHAR_PER_VEC(%OFFSET_REG64), %eax ++ cmpq %rax, %rdx ++ jbe L(check_ret_vec_page_cross2) ++# ifdef USE_AS_WCSCMP ++ addq $-(CHAR_PER_VEC * 2), %rdx ++# else ++ addq %rdi, %rdx ++# endif + # endif +- ret ++ TESTEQ %ecx ++ jz L(prepare_loop_no_len) + ++ .p2align 4,, 4 ++L(ret_vec_page_cross): ++# ifndef USE_AS_STRNCMP ++L(check_ret_vec_page_cross): ++# endif ++ tzcntl %ecx, %ecx ++ addl %OFFSET_REG, %ecx ++L(ret_vec_page_cross_cont): + # ifdef USE_AS_WCSCMP +- .p2align 4 +-L(different): +- /* Use movl to avoid modifying EFLAGS. */ +- movl $0, %eax ++ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx ++ xorl %eax, %eax ++ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx ++ je L(ret12) + setl %al + negl %eax +- orl $1, %eax +- ret ++ xorl %r8d, %eax ++# else ++ movzbl (%rdi, %rcx, SIZE_OF_CHAR), %eax ++ movzbl (%rsi, %rcx, SIZE_OF_CHAR), %ecx ++ subl %ecx, %eax ++ xorl %r8d, %eax ++ subl %r8d, %eax + # endif ++L(ret12): ++ ret ++ + + # ifdef USE_AS_STRNCMP +- .p2align 4 +-L(zero): ++ .p2align 4,, 10 ++L(check_ret_vec_page_cross2): ++ TESTEQ %ecx ++L(check_ret_vec_page_cross): ++ tzcntl %ecx, %ecx ++ addl %OFFSET_REG, %ecx ++ cmpq %rcx, %rdx ++ ja L(ret_vec_page_cross_cont) ++ .p2align 4,, 2 ++L(ret_zero_page_cross): + xorl %eax, %eax + ret ++# endif + +- .p2align 4 +-L(char0): +-# ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (%rdi), %ecx +- cmpl (%rsi), %ecx +- jne L(wcscmp_return) +-# else +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax +-# endif +- ret ++ .p2align 4,, 4 ++L(page_cross_s2): ++ /* Ensure this is a true page cross. */ ++ subl $(PAGE_SIZE - VEC_SIZE * 4), %ecx ++ jbe L(no_page_cross) ++ ++ ++ movl %ecx, %eax ++ movq %rdi, %rcx ++ movq %rsi, %rdi ++ movq %rcx, %rsi ++ ++ /* set r8 to negate return value as rdi and rsi swapped. */ ++# ifdef USE_AS_WCSCMP ++ movl $-4, %r8d ++# else ++ movl $-1, %r8d + # endif ++ xorl %OFFSET_REG, %OFFSET_REG + +- .p2align 4 +-L(last_vector): +- addq %rdx, %rdi +- addq %rdx, %rsi +-# ifdef USE_AS_STRNCMP +- subq %rdx, %r11 ++ /* Check if more than 1x VEC till page cross. */ ++ subl $(VEC_SIZE * 3), %eax ++ jle L(page_cross_loop) ++ ++ .p2align 4,, 6 ++L(less_1x_vec_till_page): ++# ifdef USE_AS_WCSCMP ++ shrl $2, %eax + # endif +- tzcntl %ecx, %edx ++ /* Find largest load size we can use. */ ++ cmpl $(16 / SIZE_OF_CHAR), %eax ++ ja L(less_16_till_page) ++ ++ /* Use 16 byte comparison. */ ++ vmovdqu (%rdi), %xmm0 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, (%rsi), %xmm0, %k1{%k2} ++ kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- sall $2, %edx ++ subl $0xf, %ecx ++# else ++ incw %cx + # endif ++ jnz L(check_ret_vec_page_cross) ++ movl $(16 / SIZE_OF_CHAR), %OFFSET_REG + # ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subl %eax, %OFFSET_REG ++# else ++ /* Explicit check for 16 byte alignment. */ ++ subl %eax, %OFFSET_REG ++ jz L(prepare_loop) + # endif ++ vmovdqu (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0, %k1{%k2} ++ kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP +- xorl %eax, %eax +- movl (%rdi, %rdx), %ecx +- cmpl (%rsi, %rdx), %ecx +- jne L(wcscmp_return) ++ subl $0xf, %ecx + # else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %edx +- subl %edx, %eax ++ incw %cx + # endif ++ jnz L(check_ret_vec_page_cross) ++# ifdef USE_AS_STRNCMP ++ addl $(16 / SIZE_OF_CHAR), %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subq $-(CHAR_PER_VEC * 4), %rdx ++ ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi ++# else ++ leaq (16 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq (16 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi ++# endif ++ jmp L(prepare_loop_aligned) ++ ++# ifdef USE_AS_STRNCMP ++ .p2align 4,, 2 ++L(ret_zero_page_cross_slow_case0): ++ xorl %eax, %eax + ret ++# endif + +- /* Comparing on page boundary region requires special treatment: +- It must done one vector at the time, starting with the wider +- ymm vector if possible, if not, with xmm. If fetching 16 bytes +- (xmm) still passes the boundary, byte comparison must be done. +- */ +- .p2align 4 +-L(cross_page): +- /* Try one ymm vector at a time. */ +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(cross_page_1_vector) +-L(loop_1_vector): +- VMOVU (%rdi, %rdx), %YMM0 + +- VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in YMM0 and 32 bytes at (%rsi, %rdx). */ +- VPCMP $0, (%rsi, %rdx), %YMM0, %k1{%k2} ++ .p2align 4,, 10 ++L(less_16_till_page): ++ cmpl $(24 / SIZE_OF_CHAR), %eax ++ ja L(less_8_till_page) ++ ++ /* Use 8 byte comparison. */ ++ vmovq (%rdi), %xmm0 ++ vmovq (%rsi), %xmm1 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, %xmm1, %xmm0, %k1{%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP +- subl $0xff, %ecx ++ subl $0x3, %ecx + # else +- incl %ecx ++ incb %cl + # endif +- jne L(last_vector) ++ jnz L(check_ret_vec_page_cross) + +- addl $VEC_SIZE, %edx + +- addl $VEC_SIZE, %eax + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq $(8 / SIZE_OF_CHAR), %rdx ++ jbe L(ret_zero_page_cross_slow_case0) + # endif +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jle L(loop_1_vector) +-L(cross_page_1_vector): +- /* Less than 32 bytes to check, try one xmm vector. */ +- cmpl $(PAGE_SIZE - 16), %eax +- jg L(cross_page_1_xmm) +- VMOVU (%rdi, %rdx), %XMM0 ++ movl $(24 / SIZE_OF_CHAR), %OFFSET_REG ++ subl %eax, %OFFSET_REG + +- VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in XMM0 and 16 bytes at (%rsi, %rdx). */ +- VPCMP $0, (%rsi, %rdx), %XMM0, %k1{%k2} ++ vmovq (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 ++ vmovq (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, %xmm1, %xmm0, %k1{%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP +- subl $0xf, %ecx ++ subl $0x3, %ecx + # else +- subl $0xffff, %ecx ++ incb %cl + # endif +- jne L(last_vector) ++ jnz L(check_ret_vec_page_cross) ++ + +- addl $16, %edx +-# ifndef USE_AS_WCSCMP +- addl $16, %eax +-# endif + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ addl $(8 / SIZE_OF_CHAR), %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case0) ++ subq $-(CHAR_PER_VEC * 4), %rdx ++ ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi ++# else ++ leaq (8 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq (8 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi + # endif ++ jmp L(prepare_loop_aligned) + +-L(cross_page_1_xmm): +-# ifndef USE_AS_WCSCMP +- /* Less than 16 bytes to check, try 8 byte vector. NB: No need +- for wcscmp nor wcsncmp since wide char is 4 bytes. */ +- cmpl $(PAGE_SIZE - 8), %eax +- jg L(cross_page_8bytes) +- vmovq (%rdi, %rdx), %XMM0 +- vmovq (%rsi, %rdx), %XMM1 + +- VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in XMM0 and XMM1. */ +- VPCMP $0, %XMM1, %XMM0, %k1{%k2} +- kmovb %k1, %ecx ++ ++ ++ .p2align 4,, 10 ++L(less_8_till_page): + # ifdef USE_AS_WCSCMP +- subl $0x3, %ecx ++ /* If using wchar then this is the only check before we reach ++ the page boundary. */ ++ movl (%rdi), %eax ++ movl (%rsi), %ecx ++ cmpl %ecx, %eax ++ jnz L(ret_less_8_wcs) ++# ifdef USE_AS_STRNCMP ++ addq $-(CHAR_PER_VEC * 2), %rdx ++ /* We already checked for len <= 1 so cannot hit that case here. ++ */ ++# endif ++ testl %eax, %eax ++ jnz L(prepare_loop) ++ ret ++ ++ .p2align 4,, 8 ++L(ret_less_8_wcs): ++ setl %OFFSET_REG8 ++ negl %OFFSET_REG ++ movl %OFFSET_REG, %eax ++ xorl %r8d, %eax ++ ret ++ + # else +- subl $0xff, %ecx +-# endif +- jne L(last_vector) ++ cmpl $28, %eax ++ ja L(less_4_till_page) ++ ++ vmovd (%rdi), %xmm0 ++ vmovd (%rsi), %xmm1 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, %xmm1, %xmm0, %k1{%k2} ++ kmovd %k1, %ecx ++ subl $0xf, %ecx ++ jnz L(check_ret_vec_page_cross) + +- addl $8, %edx +- addl $8, %eax + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ cmpq $4, %rdx ++ jbe L(ret_zero_page_cross_slow_case1) + # endif ++ movl $(28 / SIZE_OF_CHAR), %OFFSET_REG ++ subl %eax, %OFFSET_REG + +-L(cross_page_8bytes): +- /* Less than 8 bytes to check, try 4 byte vector. */ +- cmpl $(PAGE_SIZE - 4), %eax +- jg L(cross_page_4bytes) +- vmovd (%rdi, %rdx), %XMM0 +- vmovd (%rsi, %rdx), %XMM1 +- +- VPTESTM %YMM0, %YMM0, %k2 +- /* Each bit cleared in K1 represents a mismatch or a null CHAR +- in XMM0 and XMM1. */ +- VPCMP $0, %XMM1, %XMM0, %k1{%k2} ++ vmovd (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 ++ vmovd (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1 ++ VPTESTM %xmm0, %xmm0, %k2 ++ VPCMP $0, %xmm1, %xmm0, %k1{%k2} + kmovd %k1, %ecx +-# ifdef USE_AS_WCSCMP +- subl $0x1, %ecx +-# else + subl $0xf, %ecx +-# endif +- jne L(last_vector) ++ jnz L(check_ret_vec_page_cross) ++# ifdef USE_AS_STRNCMP ++ addl $(4 / SIZE_OF_CHAR), %OFFSET_REG ++ subq %OFFSET_REG64, %rdx ++ jbe L(ret_zero_page_cross_slow_case1) ++ subq $-(CHAR_PER_VEC * 4), %rdx ++ ++ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi ++# else ++ leaq (4 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi ++ leaq (4 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi ++# endif ++ jmp L(prepare_loop_aligned) ++ + +- addl $4, %edx + # ifdef USE_AS_STRNCMP +- /* Return 0 if the current offset (%rdx) >= the maximum offset +- (%r11). */ +- cmpq %r11, %rdx +- jae L(zero) ++ .p2align 4,, 2 ++L(ret_zero_page_cross_slow_case1): ++ xorl %eax, %eax ++ ret + # endif + +-L(cross_page_4bytes): +-# endif +- /* Less than 4 bytes to check, try one byte/dword at a time. */ +-# ifdef USE_AS_STRNCMP +- cmpq %r11, %rdx +- jae L(zero) +-# endif +-# ifdef USE_AS_WCSCMP +- movl (%rdi, %rdx), %eax +- movl (%rsi, %rdx), %ecx +-# else +- movzbl (%rdi, %rdx), %eax +- movzbl (%rsi, %rdx), %ecx +-# endif +- testl %eax, %eax +- jne L(cross_page_loop) ++ .p2align 4,, 10 ++L(less_4_till_page): ++ subq %rdi, %rsi ++ /* Extremely slow byte comparison loop. */ ++L(less_4_loop): ++ movzbl (%rdi), %eax ++ movzbl (%rsi, %rdi), %ecx + subl %ecx, %eax ++ jnz L(ret_less_4_loop) ++ testl %ecx, %ecx ++ jz L(ret_zero_4_loop) ++# ifdef USE_AS_STRNCMP ++ decq %rdx ++ jz L(ret_zero_4_loop) ++# endif ++ incq %rdi ++ /* end condition is reach page boundary (rdi is aligned). */ ++ testl $31, %edi ++ jnz L(less_4_loop) ++ leaq -(VEC_SIZE * 4)(%rdi, %rsi), %rsi ++ addq $-(VEC_SIZE * 4), %rdi ++# ifdef USE_AS_STRNCMP ++ subq $-(CHAR_PER_VEC * 4), %rdx ++# endif ++ jmp L(prepare_loop_aligned) ++ ++L(ret_zero_4_loop): ++ xorl %eax, %eax ++ ret ++L(ret_less_4_loop): ++ xorl %r8d, %eax ++ subl %r8d, %eax + ret +-END (STRCMP) ++# endif ++END(STRCMP) + #endif diff --git a/SOURCES/glibc-upstream-2.34-203.patch b/SOURCES/glibc-upstream-2.34-203.patch new file mode 100644 index 0000000..e45b588 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-203.patch @@ -0,0 +1,29 @@ +commit d299032743e05571ef326c838a5ecf6ef5b3e9c3 +Author: H.J. Lu +Date: Fri Feb 4 11:09:10 2022 -0800 + + x86-64: Fix strcmp-avx2.S + + Change "movl %edx, %rdx" to "movl %edx, %edx" in: + + commit b77b06e0e296f1a2276c27a67e1d44f2cfa38d45 + Author: Noah Goldstein + Date: Mon Jan 10 15:35:38 2022 -0600 + + x86: Optimize strcmp-avx2.S + + (cherry picked from commit c15efd011cea3d8f0494269eb539583215a1feed) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index a0d1c65db11028bc..cdded412a70bad10 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -106,7 +106,7 @@ ENTRY(STRCMP) + # ifdef USE_AS_STRNCMP + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ +- movl %edx, %rdx ++ movl %edx, %edx + # endif + cmp $1, %RDX_LP + /* Signed comparison intentional. We use this branch to also diff --git a/SOURCES/glibc-upstream-2.34-204.patch b/SOURCES/glibc-upstream-2.34-204.patch new file mode 100644 index 0000000..4250493 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-204.patch @@ -0,0 +1,29 @@ +commit 53ddafe917a8af17b16beb794c29e5b09b86d534 +Author: H.J. Lu +Date: Fri Feb 4 11:11:08 2022 -0800 + + x86-64: Fix strcmp-evex.S + + Change "movl %edx, %rdx" to "movl %edx, %edx" in: + + commit 8418eb3ff4b781d31c4ed5dc6c0bd7356bc45db9 + Author: Noah Goldstein + Date: Mon Jan 10 15:35:39 2022 -0600 + + x86: Optimize strcmp-evex.S + + (cherry picked from commit 0e0199a9e02ebe42e2b36958964d63f03573c382) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 99d8409af27327ad..ed56af8ecdad48b2 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -116,7 +116,7 @@ ENTRY(STRCMP) + # ifdef USE_AS_STRNCMP + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ +- movl %edx, %rdx ++ movl %edx, %edx + # endif + cmp $1, %RDX_LP + /* Signed comparison intentional. We use this branch to also diff --git a/SOURCES/glibc-upstream-2.34-205.patch b/SOURCES/glibc-upstream-2.34-205.patch new file mode 100644 index 0000000..6cf18b8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-205.patch @@ -0,0 +1,451 @@ +commit ea19c490a3f5628d55ded271cbb753e66b2f05e8 +Author: Noah Goldstein +Date: Sun Feb 6 00:54:18 2022 -0600 + + x86: Improve vec generation in memset-vec-unaligned-erms.S + + No bug. + + Split vec generation into multiple steps. This allows the + broadcast in AVX2 to use 'xmm' registers for the L(less_vec) + case. This saves an expensive lane-cross instruction and removes + the need for 'vzeroupper'. + + For SSE2 replace 2x 'punpck' instructions with zero-idiom 'pxor' for + byte broadcast. + + Results for memset-avx2 small (geomean of N = 20 benchset runs). + + size, New Time, Old Time, New / Old + 0, 4.100, 3.831, 0.934 + 1, 5.074, 4.399, 0.867 + 2, 4.433, 4.411, 0.995 + 4, 4.487, 4.415, 0.984 + 8, 4.454, 4.396, 0.987 + 16, 4.502, 4.443, 0.987 + + All relevant string/wcsmbs tests are passing. + Reviewed-by: H.J. Lu + + (cherry picked from commit b62ace2740a106222e124cc86956448fa07abf4d) + +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 0137eba4cdd9f830..34ee0bfdcb81fb39 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -28,17 +28,22 @@ + #define VMOVU movups + #define VMOVA movaps + +-#define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ ++# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ +- movq r, %rax; \ +- punpcklbw %xmm0, %xmm0; \ +- punpcklwd %xmm0, %xmm0; \ +- pshufd $0, %xmm0, %xmm0 ++ pxor %xmm1, %xmm1; \ ++ pshufb %xmm1, %xmm0; \ ++ movq r, %rax + +-#define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ ++# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ +- movq r, %rax; \ +- pshufd $0, %xmm0, %xmm0 ++ pshufd $0, %xmm0, %xmm0; \ ++ movq r, %rax ++ ++# define MEMSET_VDUP_TO_VEC0_HIGH() ++# define MEMSET_VDUP_TO_VEC0_LOW() ++ ++# define WMEMSET_VDUP_TO_VEC0_HIGH() ++# define WMEMSET_VDUP_TO_VEC0_LOW() + + #define SECTION(p) p + +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index 1af668af0aeda59e..c0bf2875d03d51ab 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -10,15 +10,18 @@ + # define VMOVU vmovdqu + # define VMOVA vmovdqa + +-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ ++# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ +- movq r, %rax; \ +- vpbroadcastb %xmm0, %ymm0 ++ movq r, %rax; + +-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- vmovd d, %xmm0; \ +- movq r, %rax; \ +- vpbroadcastd %xmm0, %ymm0 ++# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ ++ MEMSET_SET_VEC0_AND_SET_RETURN(d, r) ++ ++# define MEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastb %xmm0, %ymm0 ++# define MEMSET_VDUP_TO_VEC0_LOW() vpbroadcastb %xmm0, %xmm0 ++ ++# define WMEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastd %xmm0, %ymm0 ++# define WMEMSET_VDUP_TO_VEC0_LOW() vpbroadcastd %xmm0, %xmm0 + + # ifndef SECTION + # define SECTION(p) p##.avx +@@ -30,5 +33,6 @@ + # define WMEMSET_SYMBOL(p,s) p##_avx2_##s + # endif + ++# define USE_XMM_LESS_VEC + # include "memset-vec-unaligned-erms.S" + #endif +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index f14d6f8493c21a36..5241216a77bf72b7 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -15,13 +15,19 @@ + + # define VZEROUPPER + +-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- movq r, %rax; \ +- vpbroadcastb d, %VEC0 ++# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ ++ vpbroadcastb d, %VEC0; \ ++ movq r, %rax + +-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- movq r, %rax; \ +- vpbroadcastd d, %VEC0 ++# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ ++ vpbroadcastd d, %VEC0; \ ++ movq r, %rax ++ ++# define MEMSET_VDUP_TO_VEC0_HIGH() ++# define MEMSET_VDUP_TO_VEC0_LOW() ++ ++# define WMEMSET_VDUP_TO_VEC0_HIGH() ++# define WMEMSET_VDUP_TO_VEC0_LOW() + + # define SECTION(p) p##.evex512 + # define MEMSET_SYMBOL(p,s) p##_avx512_##s +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +index 64b09e77cc20cc42..637002150659123c 100644 +--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -15,13 +15,19 @@ + + # define VZEROUPPER + +-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- movq r, %rax; \ +- vpbroadcastb d, %VEC0 ++# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ ++ vpbroadcastb d, %VEC0; \ ++ movq r, %rax + +-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +- movq r, %rax; \ +- vpbroadcastd d, %VEC0 ++# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ ++ vpbroadcastd d, %VEC0; \ ++ movq r, %rax ++ ++# define MEMSET_VDUP_TO_VEC0_HIGH() ++# define MEMSET_VDUP_TO_VEC0_LOW() ++ ++# define WMEMSET_VDUP_TO_VEC0_HIGH() ++# define WMEMSET_VDUP_TO_VEC0_LOW() + + # define SECTION(p) p##.evex + # define MEMSET_SYMBOL(p,s) p##_evex_##s +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index e723413a664c088f..c8db87dcbf69f0d8 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -58,8 +58,10 @@ + #ifndef MOVQ + # if VEC_SIZE > 16 + # define MOVQ vmovq ++# define MOVD vmovd + # else + # define MOVQ movq ++# define MOVD movd + # endif + #endif + +@@ -72,9 +74,17 @@ + #if defined USE_WITH_EVEX || defined USE_WITH_AVX512 + # define END_REG rcx + # define LOOP_REG rdi ++# define LESS_VEC_REG rax + #else + # define END_REG rdi + # define LOOP_REG rdx ++# define LESS_VEC_REG rdi ++#endif ++ ++#ifdef USE_XMM_LESS_VEC ++# define XMM_SMALL 1 ++#else ++# define XMM_SMALL 0 + #endif + + #define PAGE_SIZE 4096 +@@ -110,8 +120,12 @@ END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) + + ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned)) + shl $2, %RDX_LP +- WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) +- jmp L(entry_from_bzero) ++ WMEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi) ++ WMEMSET_VDUP_TO_VEC0_LOW() ++ cmpq $VEC_SIZE, %rdx ++ jb L(less_vec_no_vdup) ++ WMEMSET_VDUP_TO_VEC0_HIGH() ++ jmp L(entry_from_wmemset) + END (WMEMSET_SYMBOL (__wmemset, unaligned)) + #endif + +@@ -123,7 +137,7 @@ END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) + #endif + + ENTRY (MEMSET_SYMBOL (__memset, unaligned)) +- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) ++ MEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx +@@ -131,6 +145,8 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned)) + L(entry_from_bzero): + cmpq $VEC_SIZE, %rdx + jb L(less_vec) ++ MEMSET_VDUP_TO_VEC0_HIGH() ++L(entry_from_wmemset): + cmpq $(VEC_SIZE * 2), %rdx + ja L(more_2x_vec) + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +@@ -179,27 +195,27 @@ END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + # endif + + ENTRY_P2ALIGN (MEMSET_SYMBOL (__memset, unaligned_erms), 6) +- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) ++ MEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx + # endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) ++ MEMSET_VDUP_TO_VEC0_HIGH () + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(stosb_more_2x_vec) +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. +- */ +- VMOVU %VEC(0), (%rax) +- VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) + VZEROUPPER_RETURN + #endif + +- .p2align 4,, 10 ++ .p2align 4,, 4 + L(last_2x_vec): + #ifdef USE_LESS_VEC_MASK_STORE +- VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%rcx) +- VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%rcx) ++ VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi, %rdx) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) + #else + VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi) + VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi) +@@ -212,6 +228,7 @@ L(last_2x_vec): + #ifdef USE_LESS_VEC_MASK_STORE + .p2align 4,, 10 + L(less_vec): ++L(less_vec_no_vdup): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! +@@ -262,28 +279,18 @@ L(stosb_more_2x_vec): + /* Fallthrough goes to L(loop_4x_vec). Tests for memset (2x, 4x] + and (4x, 8x] jump to target. */ + L(more_2x_vec): +- +- /* Two different methods of setting up pointers / compare. The +- two methods are based on the fact that EVEX/AVX512 mov +- instructions take more bytes then AVX2/SSE2 mov instructions. As +- well that EVEX/AVX512 machines also have fast LEA_BID. Both +- setup and END_REG to avoid complex address mode. For EVEX/AVX512 +- this saves code size and keeps a few targets in one fetch block. +- For AVX2/SSE2 this helps prevent AGU bottlenecks. */ +-#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 +- /* If EVEX/AVX512 compute END_REG - (VEC_SIZE * 4 + +- LOOP_4X_OFFSET) with LEA_BID. */ +- +- /* END_REG is rcx for EVEX/AVX512. */ +- leaq -(VEC_SIZE * 4 + LOOP_4X_OFFSET)(%rdi, %rdx), %END_REG +-#endif +- +- /* Stores to first 2x VEC before cmp as any path forward will +- require it. */ +- VMOVU %VEC(0), (%rax) +- VMOVU %VEC(0), VEC_SIZE(%rax) ++ /* Store next 2x vec regardless. */ ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * 1)(%rdi) + + ++ /* Two different methods of setting up pointers / compare. The two ++ methods are based on the fact that EVEX/AVX512 mov instructions take ++ more bytes then AVX2/SSE2 mov instructions. As well that EVEX/AVX512 ++ machines also have fast LEA_BID. Both setup and END_REG to avoid complex ++ address mode. For EVEX/AVX512 this saves code size and keeps a few ++ targets in one fetch block. For AVX2/SSE2 this helps prevent AGU ++ bottlenecks. */ + #if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512) + /* If AVX2/SSE2 compute END_REG (rdi) with ALU. */ + addq %rdx, %END_REG +@@ -292,6 +299,15 @@ L(more_2x_vec): + cmpq $(VEC_SIZE * 4), %rdx + jbe L(last_2x_vec) + ++ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++ /* If EVEX/AVX512 compute END_REG - (VEC_SIZE * 4 + LOOP_4X_OFFSET) with ++ LEA_BID. */ ++ ++ /* END_REG is rcx for EVEX/AVX512. */ ++ leaq -(VEC_SIZE * 4 + LOOP_4X_OFFSET)(%rdi, %rdx), %END_REG ++#endif ++ + /* Store next 2x vec regardless. */ + VMOVU %VEC(0), (VEC_SIZE * 2)(%rax) + VMOVU %VEC(0), (VEC_SIZE * 3)(%rax) +@@ -355,65 +371,93 @@ L(stosb_local): + /* Define L(less_vec) only if not otherwise defined. */ + .p2align 4 + L(less_vec): ++ /* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to ++ xmm). This is only does anything for AVX2. */ ++ MEMSET_VDUP_TO_VEC0_LOW () ++L(less_vec_no_vdup): + #endif + L(cross_page): + #if VEC_SIZE > 32 + cmpl $32, %edx +- jae L(between_32_63) ++ jge L(between_32_63) + #endif + #if VEC_SIZE > 16 + cmpl $16, %edx +- jae L(between_16_31) ++ jge L(between_16_31) ++#endif ++#ifndef USE_XMM_LESS_VEC ++ MOVQ %XMM0, %rcx + #endif +- MOVQ %XMM0, %rdi + cmpl $8, %edx +- jae L(between_8_15) ++ jge L(between_8_15) + cmpl $4, %edx +- jae L(between_4_7) ++ jge L(between_4_7) + cmpl $1, %edx +- ja L(between_2_3) +- jb L(return) +- movb %sil, (%rax) +- VZEROUPPER_RETURN ++ jg L(between_2_3) ++ jl L(between_0_0) ++ movb %sil, (%LESS_VEC_REG) ++L(between_0_0): ++ ret + +- /* Align small targets only if not doing so would cross a fetch +- line. */ ++ /* Align small targets only if not doing so would cross a fetch line. ++ */ + #if VEC_SIZE > 32 + .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) + /* From 32 to 63. No branch when size == 32. */ + L(between_32_63): +- VMOVU %YMM0, (%rax) +- VMOVU %YMM0, -32(%rax, %rdx) ++ VMOVU %YMM0, (%LESS_VEC_REG) ++ VMOVU %YMM0, -32(%LESS_VEC_REG, %rdx) + VZEROUPPER_RETURN + #endif + + #if VEC_SIZE >= 32 +- .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) ++ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, 1) + L(between_16_31): + /* From 16 to 31. No branch when size == 16. */ +- VMOVU %XMM0, (%rax) +- VMOVU %XMM0, -16(%rax, %rdx) +- VZEROUPPER_RETURN ++ VMOVU %XMM0, (%LESS_VEC_REG) ++ VMOVU %XMM0, -16(%LESS_VEC_REG, %rdx) ++ ret + #endif + +- .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) ++ /* Move size is 3 for SSE2, EVEX, and AVX512. Move size is 4 for AVX2. ++ */ ++ .p2align 4,, SMALL_MEMSET_ALIGN(3 + XMM_SMALL, 1) + L(between_8_15): + /* From 8 to 15. No branch when size == 8. */ +- movq %rdi, (%rax) +- movq %rdi, -8(%rax, %rdx) +- VZEROUPPER_RETURN ++#ifdef USE_XMM_LESS_VEC ++ MOVQ %XMM0, (%rdi) ++ MOVQ %XMM0, -8(%rdi, %rdx) ++#else ++ movq %rcx, (%LESS_VEC_REG) ++ movq %rcx, -8(%LESS_VEC_REG, %rdx) ++#endif ++ ret + +- .p2align 4,, SMALL_MEMSET_ALIGN(2, RET_SIZE) ++ /* Move size is 2 for SSE2, EVEX, and AVX512. Move size is 4 for AVX2. ++ */ ++ .p2align 4,, SMALL_MEMSET_ALIGN(2 << XMM_SMALL, 1) + L(between_4_7): + /* From 4 to 7. No branch when size == 4. */ +- movl %edi, (%rax) +- movl %edi, -4(%rax, %rdx) +- VZEROUPPER_RETURN ++#ifdef USE_XMM_LESS_VEC ++ MOVD %XMM0, (%rdi) ++ MOVD %XMM0, -4(%rdi, %rdx) ++#else ++ movl %ecx, (%LESS_VEC_REG) ++ movl %ecx, -4(%LESS_VEC_REG, %rdx) ++#endif ++ ret + +- .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) ++ /* 4 * XMM_SMALL for the third mov for AVX2. */ ++ .p2align 4,, 4 * XMM_SMALL + SMALL_MEMSET_ALIGN(3, 1) + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ +- movw %di, (%rax) +- movb %dil, -1(%rax, %rdx) +- VZEROUPPER_RETURN ++#ifdef USE_XMM_LESS_VEC ++ movb %sil, (%rdi) ++ movb %sil, 1(%rdi) ++ movb %sil, -1(%rdi, %rdx) ++#else ++ movw %cx, (%LESS_VEC_REG) ++ movb %sil, -1(%LESS_VEC_REG, %rdx) ++#endif ++ ret + END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/SOURCES/glibc-upstream-2.34-206.patch b/SOURCES/glibc-upstream-2.34-206.patch new file mode 100644 index 0000000..ed9f37b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-206.patch @@ -0,0 +1,35 @@ +commit 190ea5f7e4e7e98b9b6e3f29835ae8b1f6a5442e +Author: Noah Goldstein +Date: Mon Feb 7 00:32:23 2022 -0600 + + x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only) + + commit b62ace2740a106222e124cc86956448fa07abf4d + Author: Noah Goldstein + Date: Sun Feb 6 00:54:18 2022 -0600 + + x86: Improve vec generation in memset-vec-unaligned-erms.S + + Revert usage of 'pshufb' in broadcast logic as it is an SSSE3 + instruction and memset.S is restricted to only SSE2 instructions. + + (cherry picked from commit 1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1) + +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 34ee0bfdcb81fb39..954471e5a5bf225b 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -30,9 +30,10 @@ + + # define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ +- pxor %xmm1, %xmm1; \ +- pshufb %xmm1, %xmm0; \ +- movq r, %rax ++ movq r, %rax; \ ++ punpcklbw %xmm0, %xmm0; \ ++ punpcklwd %xmm0, %xmm0; \ ++ pshufd $0, %xmm0, %xmm0 + + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ diff --git a/SOURCES/glibc-upstream-2.34-207.patch b/SOURCES/glibc-upstream-2.34-207.patch new file mode 100644 index 0000000..9818f5d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-207.patch @@ -0,0 +1,719 @@ +commit 5cb6329652696e79d6d576165ea87e332c9de106 +Author: H.J. Lu +Date: Mon Feb 7 05:55:15 2022 -0800 + + x86-64: Optimize bzero + + memset with zero as the value to set is by far the majority value (99%+ + for Python3 and GCC). + + bzero can be slightly more optimized for this case by using a zero-idiom + xor for broadcasting the set value to a register (vector or GPR). + + Co-developed-by: Noah Goldstein + (cherry picked from commit 3d9f171bfb5325bd5f427e9fc386453358c6e840) + +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 954471e5a5bf225b..0358210c7ff3a976 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -35,6 +35,9 @@ + punpcklwd %xmm0, %xmm0; \ + pshufd $0, %xmm0, %xmm0 + ++# define BZERO_ZERO_VEC0() \ ++ pxor %xmm0, %xmm0 ++ + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ + pshufd $0, %xmm0, %xmm0; \ +@@ -53,6 +56,10 @@ + # define MEMSET_SYMBOL(p,s) memset + #endif + ++#ifndef BZERO_SYMBOL ++# define BZERO_SYMBOL(p,s) __bzero ++#endif ++ + #ifndef WMEMSET_SYMBOL + # define WMEMSET_CHK_SYMBOL(p,s) p + # define WMEMSET_SYMBOL(p,s) __wmemset +@@ -63,6 +70,7 @@ + libc_hidden_builtin_def (memset) + + #if IS_IN (libc) ++weak_alias (__bzero, bzero) + libc_hidden_def (__wmemset) + weak_alias (__wmemset, wmemset) + libc_hidden_weak (wmemset) +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 26be40959ce62895..37d8d6f0bd2d10cc 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -1,85 +1,130 @@ + ifeq ($(subdir),string) + +-sysdep_routines += strncat-c stpncpy-c strncpy-c \ +- strcmp-sse2 strcmp-sse2-unaligned strcmp-ssse3 \ +- strcmp-sse4_2 strcmp-avx2 \ +- strncmp-sse2 strncmp-ssse3 strncmp-sse4_2 strncmp-avx2 \ +- memchr-sse2 rawmemchr-sse2 memchr-avx2 rawmemchr-avx2 \ +- memrchr-sse2 memrchr-avx2 \ +- memcmp-sse2 \ +- memcmp-avx2-movbe \ +- memcmp-sse4 memcpy-ssse3 \ +- memmove-ssse3 \ +- memcpy-ssse3-back \ +- memmove-ssse3-back \ +- memmove-avx512-no-vzeroupper \ +- strcasecmp_l-sse2 strcasecmp_l-ssse3 \ +- strcasecmp_l-sse4_2 strcasecmp_l-avx \ +- strncase_l-sse2 strncase_l-ssse3 \ +- strncase_l-sse4_2 strncase_l-avx \ +- strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \ +- strrchr-sse2 strrchr-avx2 \ +- strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \ +- strcat-avx2 strncat-avx2 \ +- strcat-ssse3 strncat-ssse3\ +- strcpy-avx2 strncpy-avx2 \ +- strcpy-sse2 stpcpy-sse2 \ +- strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ +- strcpy-sse2-unaligned strncpy-sse2-unaligned \ +- stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ +- stpcpy-avx2 stpncpy-avx2 \ +- strcat-sse2 \ +- strcat-sse2-unaligned strncat-sse2-unaligned \ +- strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \ +- strcspn-sse2 strpbrk-sse2 strspn-sse2 \ +- strcspn-c strpbrk-c strspn-c varshift \ +- memset-avx512-no-vzeroupper \ +- memmove-sse2-unaligned-erms \ +- memmove-avx-unaligned-erms \ +- memmove-avx512-unaligned-erms \ +- memset-sse2-unaligned-erms \ +- memset-avx2-unaligned-erms \ +- memset-avx512-unaligned-erms \ +- memchr-avx2-rtm \ +- memcmp-avx2-movbe-rtm \ +- memmove-avx-unaligned-erms-rtm \ +- memrchr-avx2-rtm \ +- memset-avx2-unaligned-erms-rtm \ +- rawmemchr-avx2-rtm \ +- strchr-avx2-rtm \ +- strcmp-avx2-rtm \ +- strchrnul-avx2-rtm \ +- stpcpy-avx2-rtm \ +- stpncpy-avx2-rtm \ +- strcat-avx2-rtm \ +- strcpy-avx2-rtm \ +- strlen-avx2-rtm \ +- strncat-avx2-rtm \ +- strncmp-avx2-rtm \ +- strncpy-avx2-rtm \ +- strnlen-avx2-rtm \ +- strrchr-avx2-rtm \ +- memchr-evex \ +- memcmp-evex-movbe \ +- memmove-evex-unaligned-erms \ +- memrchr-evex \ +- memset-evex-unaligned-erms \ +- rawmemchr-evex \ +- stpcpy-evex \ +- stpncpy-evex \ +- strcat-evex \ +- strchr-evex \ +- strchrnul-evex \ +- strcmp-evex \ +- strcpy-evex \ +- strlen-evex \ +- strncat-evex \ +- strncmp-evex \ +- strncpy-evex \ +- strnlen-evex \ +- strrchr-evex \ +- memchr-evex-rtm \ +- rawmemchr-evex-rtm ++sysdep_routines += \ ++ bzero \ ++ memchr-avx2 \ ++ memchr-avx2-rtm \ ++ memchr-evex \ ++ memchr-evex-rtm \ ++ memchr-sse2 \ ++ memcmp-avx2-movbe \ ++ memcmp-avx2-movbe-rtm \ ++ memcmp-evex-movbe \ ++ memcmp-sse2 \ ++ memcmp-sse4 \ ++ memcmp-ssse3 \ ++ memcpy-ssse3 \ ++ memcpy-ssse3-back \ ++ memmove-avx-unaligned-erms \ ++ memmove-avx-unaligned-erms-rtm \ ++ memmove-avx512-no-vzeroupper \ ++ memmove-avx512-unaligned-erms \ ++ memmove-evex-unaligned-erms \ ++ memmove-sse2-unaligned-erms \ ++ memmove-ssse3 \ ++ memmove-ssse3-back \ ++ memrchr-avx2 \ ++ memrchr-avx2-rtm \ ++ memrchr-evex \ ++ memrchr-sse2 \ ++ memset-avx2-unaligned-erms \ ++ memset-avx2-unaligned-erms-rtm \ ++ memset-avx512-no-vzeroupper \ ++ memset-avx512-unaligned-erms \ ++ memset-evex-unaligned-erms \ ++ memset-sse2-unaligned-erms \ ++ rawmemchr-avx2 \ ++ rawmemchr-avx2-rtm \ ++ rawmemchr-evex \ ++ rawmemchr-evex-rtm \ ++ rawmemchr-sse2 \ ++ stpcpy-avx2 \ ++ stpcpy-avx2-rtm \ ++ stpcpy-evex \ ++ stpcpy-sse2 \ ++ stpcpy-sse2-unaligned \ ++ stpcpy-ssse3 \ ++ stpncpy-avx2 \ ++ stpncpy-avx2-rtm \ ++ stpncpy-c \ ++ stpncpy-evex \ ++ stpncpy-sse2-unaligned \ ++ stpncpy-ssse3 \ ++ strcasecmp_l-avx \ ++ strcasecmp_l-sse2 \ ++ strcasecmp_l-sse4_2 \ ++ strcasecmp_l-ssse3 \ ++ strcat-avx2 \ ++ strcat-avx2-rtm \ ++ strcat-evex \ ++ strcat-sse2 \ ++ strcat-sse2-unaligned \ ++ strcat-ssse3 \ ++ strchr-avx2 \ ++ strchr-avx2-rtm \ ++ strchr-evex \ ++ strchr-sse2 \ ++ strchr-sse2-no-bsf \ ++ strchrnul-avx2 \ ++ strchrnul-avx2-rtm \ ++ strchrnul-evex \ ++ strchrnul-sse2 \ ++ strcmp-avx2 \ ++ strcmp-avx2-rtm \ ++ strcmp-evex \ ++ strcmp-sse2 \ ++ strcmp-sse2-unaligned \ ++ strcmp-sse4_2 \ ++ strcmp-ssse3 \ ++ strcpy-avx2 \ ++ strcpy-avx2-rtm \ ++ strcpy-evex \ ++ strcpy-sse2 \ ++ strcpy-sse2-unaligned \ ++ strcpy-ssse3 \ ++ strcspn-c \ ++ strcspn-sse2 \ ++ strlen-avx2 \ ++ strlen-avx2-rtm \ ++ strlen-evex \ ++ strlen-sse2 \ ++ strncase_l-avx \ ++ strncase_l-sse2 \ ++ strncase_l-sse4_2 \ ++ strncase_l-ssse3 \ ++ strncat-avx2 \ ++ strncat-avx2-rtm \ ++ strncat-c \ ++ strncat-evex \ ++ strncat-sse2-unaligned \ ++ strncat-ssse3 \ ++ strncmp-avx2 \ ++ strncmp-avx2-rtm \ ++ strncmp-evex \ ++ strncmp-sse2 \ ++ strncmp-sse4_2 \ ++ strncmp-ssse3 \ ++ strncpy-avx2 \ ++ strncpy-avx2-rtm \ ++ strncpy-c \ ++ strncpy-evex \ ++ strncpy-sse2-unaligned \ ++ strncpy-ssse3 \ ++ strnlen-avx2 \ ++ strnlen-avx2-rtm \ ++ strnlen-evex \ ++ strnlen-sse2 \ ++ strpbrk-c \ ++ strpbrk-sse2 \ ++ strrchr-avx2 \ ++ strrchr-avx2-rtm \ ++ strrchr-evex \ ++ strrchr-sse2 \ ++ strspn-c \ ++ strspn-sse2 \ ++ strstr-sse2-unaligned \ ++ varshift \ ++# sysdep_routines + CFLAGS-varshift.c += -msse4 + CFLAGS-strcspn-c.c += -msse4 + CFLAGS-strpbrk-c.c += -msse4 +diff --git a/sysdeps/x86_64/multiarch/bzero.c b/sysdeps/x86_64/multiarch/bzero.c +new file mode 100644 +index 0000000000000000..13e399a9a1fbdeb2 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/bzero.c +@@ -0,0 +1,108 @@ ++/* Multiple versions of bzero. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define __bzero __redirect___bzero ++# include ++# undef __bzero ++ ++/* OPTIMIZE1 definition required for bzero patch. */ ++# define OPTIMIZE1(name) EVALUATOR1 (SYMBOL_NAME, name) ++# define SYMBOL_NAME __bzero ++# include ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned_erms) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms_rtm) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned_erms) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned) ++ attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned_erms) ++ attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features* cpu_features = __get_cpu_features (); ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) ++ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE1 (avx512_unaligned_erms); ++ ++ return OPTIMIZE1 (avx512_unaligned); ++ } ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE1 (evex_unaligned_erms); ++ ++ return OPTIMIZE1 (evex_unaligned); ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE1 (avx2_unaligned_erms_rtm); ++ ++ return OPTIMIZE1 (avx2_unaligned_rtm); ++ } ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE1 (avx2_unaligned_erms); ++ ++ return OPTIMIZE1 (avx2_unaligned); ++ } ++ } ++ ++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) ++ return OPTIMIZE1 (sse2_unaligned_erms); ++ ++ return OPTIMIZE1 (sse2_unaligned); ++} ++ ++libc_ifunc_redirected (__redirect___bzero, __bzero, IFUNC_SELECTOR ()); ++ ++weak_alias (__bzero, bzero) ++#endif +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index 39ab10613bb0ffea..4992d7bd3206a7c0 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -282,6 +282,48 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __memset_avx512_no_vzeroupper) + ) + ++ /* Support sysdeps/x86_64/multiarch/bzero.c. */ ++ IFUNC_IMPL (i, name, bzero, ++ IFUNC_IMPL_ADD (array, i, bzero, 1, ++ __bzero_sse2_unaligned) ++ IFUNC_IMPL_ADD (array, i, bzero, 1, ++ __bzero_sse2_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ CPU_FEATURE_USABLE (AVX2), ++ __bzero_avx2_unaligned) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ CPU_FEATURE_USABLE (AVX2), ++ __bzero_avx2_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __bzero_avx2_unaligned_rtm) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __bzero_avx2_unaligned_erms_rtm) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __bzero_evex_unaligned) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __bzero_evex_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __bzero_avx512_unaligned_erms) ++ IFUNC_IMPL_ADD (array, i, bzero, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __bzero_avx512_unaligned) ++ ) ++ + /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +index 8ac3e479bba488be..5a5ee6f67299400b 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +@@ -5,6 +5,7 @@ + + #define SECTION(p) p##.avx.rtm + #define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm ++#define BZERO_SYMBOL(p,s) p##_avx2_##s##_rtm + #define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + + #include "memset-avx2-unaligned-erms.S" +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index c0bf2875d03d51ab..a093a2831f3dfa0d 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -14,6 +14,9 @@ + vmovd d, %xmm0; \ + movq r, %rax; + ++# define BZERO_ZERO_VEC0() \ ++ vpxor %xmm0, %xmm0, %xmm0 ++ + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + MEMSET_SET_VEC0_AND_SET_RETURN(d, r) + +@@ -29,6 +32,9 @@ + # ifndef MEMSET_SYMBOL + # define MEMSET_SYMBOL(p,s) p##_avx2_##s + # endif ++# ifndef BZERO_SYMBOL ++# define BZERO_SYMBOL(p,s) p##_avx2_##s ++# endif + # ifndef WMEMSET_SYMBOL + # define WMEMSET_SYMBOL(p,s) p##_avx2_##s + # endif +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index 5241216a77bf72b7..727c92133a15900f 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -19,6 +19,9 @@ + vpbroadcastb d, %VEC0; \ + movq r, %rax + ++# define BZERO_ZERO_VEC0() \ ++ vpxorq %XMM0, %XMM0, %XMM0 ++ + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + vpbroadcastd d, %VEC0; \ + movq r, %rax +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +index 637002150659123c..5d8fa78f05476b10 100644 +--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -19,6 +19,9 @@ + vpbroadcastb d, %VEC0; \ + movq r, %rax + ++# define BZERO_ZERO_VEC0() \ ++ vpxorq %XMM0, %XMM0, %XMM0 ++ + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + vpbroadcastd d, %VEC0; \ + movq r, %rax +diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +index e4e95fc19fe48d2d..bac74ac37fd3c144 100644 +--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +@@ -22,6 +22,7 @@ + + #if IS_IN (libc) + # define MEMSET_SYMBOL(p,s) p##_sse2_##s ++# define BZERO_SYMBOL(p,s) MEMSET_SYMBOL (p, s) + # define WMEMSET_SYMBOL(p,s) p##_sse2_##s + + # ifdef SHARED +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index c8db87dcbf69f0d8..39a096a594ccb5b6 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -26,6 +26,10 @@ + + #include + ++#ifndef BZERO_SYMBOL ++# define BZERO_SYMBOL(p,s) MEMSET_SYMBOL (p, s) ++#endif ++ + #ifndef MEMSET_CHK_SYMBOL + # define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s) + #endif +@@ -87,6 +91,18 @@ + # define XMM_SMALL 0 + #endif + ++#ifdef USE_LESS_VEC_MASK_STORE ++# define SET_REG64 rcx ++# define SET_REG32 ecx ++# define SET_REG16 cx ++# define SET_REG8 cl ++#else ++# define SET_REG64 rsi ++# define SET_REG32 esi ++# define SET_REG16 si ++# define SET_REG8 sil ++#endif ++ + #define PAGE_SIZE 4096 + + /* Macro to calculate size of small memset block for aligning +@@ -96,18 +112,6 @@ + + #ifndef SECTION + # error SECTION is not defined! +-#endif +- +- .section SECTION(.text),"ax",@progbits +-#if VEC_SIZE == 16 && IS_IN (libc) +-ENTRY (__bzero) +- mov %RDI_LP, %RAX_LP /* Set return value. */ +- mov %RSI_LP, %RDX_LP /* Set n. */ +- xorl %esi, %esi +- pxor %XMM0, %XMM0 +- jmp L(entry_from_bzero) +-END (__bzero) +-weak_alias (__bzero, bzero) + #endif + + #if IS_IN (libc) +@@ -123,12 +127,37 @@ ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned)) + WMEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi) + WMEMSET_VDUP_TO_VEC0_LOW() + cmpq $VEC_SIZE, %rdx +- jb L(less_vec_no_vdup) ++ jb L(less_vec_from_wmemset) + WMEMSET_VDUP_TO_VEC0_HIGH() + jmp L(entry_from_wmemset) + END (WMEMSET_SYMBOL (__wmemset, unaligned)) + #endif + ++ENTRY (BZERO_SYMBOL(__bzero, unaligned)) ++#if VEC_SIZE > 16 ++ BZERO_ZERO_VEC0 () ++#endif ++ mov %RDI_LP, %RAX_LP ++ mov %RSI_LP, %RDX_LP ++#ifndef USE_LESS_VEC_MASK_STORE ++ xorl %esi, %esi ++#endif ++ cmp $VEC_SIZE, %RDX_LP ++ jb L(less_vec_no_vdup) ++#ifdef USE_LESS_VEC_MASK_STORE ++ xorl %esi, %esi ++#endif ++#if VEC_SIZE <= 16 ++ BZERO_ZERO_VEC0 () ++#endif ++ cmp $(VEC_SIZE * 2), %RDX_LP ++ ja L(more_2x_vec) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) ++ VZEROUPPER_RETURN ++END (BZERO_SYMBOL(__bzero, unaligned)) ++ + #if defined SHARED && IS_IN (libc) + ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) + cmp %RDX_LP, %RCX_LP +@@ -142,7 +171,6 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned)) + /* Clear the upper 32 bits. */ + mov %edx, %edx + # endif +-L(entry_from_bzero): + cmpq $VEC_SIZE, %rdx + jb L(less_vec) + MEMSET_VDUP_TO_VEC0_HIGH() +@@ -187,6 +215,31 @@ END (__memset_erms) + END (MEMSET_SYMBOL (__memset, erms)) + # endif + ++ENTRY_P2ALIGN (BZERO_SYMBOL(__bzero, unaligned_erms), 6) ++# if VEC_SIZE > 16 ++ BZERO_ZERO_VEC0 () ++# endif ++ mov %RDI_LP, %RAX_LP ++ mov %RSI_LP, %RDX_LP ++# ifndef USE_LESS_VEC_MASK_STORE ++ xorl %esi, %esi ++# endif ++ cmp $VEC_SIZE, %RDX_LP ++ jb L(less_vec_no_vdup) ++# ifdef USE_LESS_VEC_MASK_STORE ++ xorl %esi, %esi ++# endif ++# if VEC_SIZE <= 16 ++ BZERO_ZERO_VEC0 () ++# endif ++ cmp $(VEC_SIZE * 2), %RDX_LP ++ ja L(stosb_more_2x_vec) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) ++ VZEROUPPER_RETURN ++END (BZERO_SYMBOL(__bzero, unaligned_erms)) ++ + # if defined SHARED && IS_IN (libc) + ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + cmp %RDX_LP, %RCX_LP +@@ -229,6 +282,7 @@ L(last_2x_vec): + .p2align 4,, 10 + L(less_vec): + L(less_vec_no_vdup): ++L(less_vec_from_wmemset): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! +@@ -374,8 +428,11 @@ L(less_vec): + /* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to + xmm). This is only does anything for AVX2. */ + MEMSET_VDUP_TO_VEC0_LOW () ++L(less_vec_from_wmemset): ++#if VEC_SIZE > 16 + L(less_vec_no_vdup): + #endif ++#endif + L(cross_page): + #if VEC_SIZE > 32 + cmpl $32, %edx +@@ -386,7 +443,10 @@ L(cross_page): + jge L(between_16_31) + #endif + #ifndef USE_XMM_LESS_VEC +- MOVQ %XMM0, %rcx ++ MOVQ %XMM0, %SET_REG64 ++#endif ++#if VEC_SIZE <= 16 ++L(less_vec_no_vdup): + #endif + cmpl $8, %edx + jge L(between_8_15) +@@ -395,7 +455,7 @@ L(cross_page): + cmpl $1, %edx + jg L(between_2_3) + jl L(between_0_0) +- movb %sil, (%LESS_VEC_REG) ++ movb %SET_REG8, (%LESS_VEC_REG) + L(between_0_0): + ret + +@@ -428,8 +488,8 @@ L(between_8_15): + MOVQ %XMM0, (%rdi) + MOVQ %XMM0, -8(%rdi, %rdx) + #else +- movq %rcx, (%LESS_VEC_REG) +- movq %rcx, -8(%LESS_VEC_REG, %rdx) ++ movq %SET_REG64, (%LESS_VEC_REG) ++ movq %SET_REG64, -8(%LESS_VEC_REG, %rdx) + #endif + ret + +@@ -442,8 +502,8 @@ L(between_4_7): + MOVD %XMM0, (%rdi) + MOVD %XMM0, -4(%rdi, %rdx) + #else +- movl %ecx, (%LESS_VEC_REG) +- movl %ecx, -4(%LESS_VEC_REG, %rdx) ++ movl %SET_REG32, (%LESS_VEC_REG) ++ movl %SET_REG32, -4(%LESS_VEC_REG, %rdx) + #endif + ret + +@@ -452,12 +512,12 @@ L(between_4_7): + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ + #ifdef USE_XMM_LESS_VEC +- movb %sil, (%rdi) +- movb %sil, 1(%rdi) +- movb %sil, -1(%rdi, %rdx) ++ movb %SET_REG8, (%rdi) ++ movb %SET_REG8, 1(%rdi) ++ movb %SET_REG8, -1(%rdi, %rdx) + #else +- movw %cx, (%LESS_VEC_REG) +- movb %sil, -1(%LESS_VEC_REG, %rdx) ++ movw %SET_REG16, (%LESS_VEC_REG) ++ movb %SET_REG8, -1(%LESS_VEC_REG, %rdx) + #endif + ret + END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/SOURCES/glibc-upstream-2.34-208.patch b/SOURCES/glibc-upstream-2.34-208.patch new file mode 100644 index 0000000..d4d9b52 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-208.patch @@ -0,0 +1,29 @@ +commit 70509f9b4807295b2b4b43bffe110580fc0381ef +Author: Noah Goldstein +Date: Sat Feb 12 00:45:00 2022 -0600 + + x86: Set .text section in memset-vec-unaligned-erms + + commit 3d9f171bfb5325bd5f427e9fc386453358c6e840 + Author: H.J. Lu + Date: Mon Feb 7 05:55:15 2022 -0800 + + x86-64: Optimize bzero + + Remove setting the .text section for the code. This commit + adds that back. + + (cherry picked from commit 7912236f4a597deb092650ca79f33504ddb4af28) + +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index 39a096a594ccb5b6..d9c577fb5ff9700f 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -114,6 +114,7 @@ + # error SECTION is not defined! + #endif + ++ .section SECTION(.text), "ax", @progbits + #if IS_IN (libc) + # if defined SHARED + ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) diff --git a/SOURCES/glibc-upstream-2.34-209.patch b/SOURCES/glibc-upstream-2.34-209.patch new file mode 100644 index 0000000..4874143 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-209.patch @@ -0,0 +1,76 @@ +commit 5373c90f2ea3c3fa9931a684c9b81c648dfbe8d7 +Author: Noah Goldstein +Date: Tue Feb 15 20:27:21 2022 -0600 + + x86: Fix bug in strncmp-evex and strncmp-avx2 [BZ #28895] + + Logic can read before the start of `s1` / `s2` if both `s1` and `s2` + are near the start of a page. To avoid having the result contimated by + these comparisons the `strcmp` variants would mask off these + comparisons. This was missing in the `strncmp` variants causing + the bug. This commit adds the masking to `strncmp` so that out of + range comparisons don't affect the result. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass as + well a full xcheck on x86_64 linux. + Reviewed-by: H.J. Lu + + (cherry picked from commit e108c02a5e23c8c88ce66d8705d4a24bb6b9a8bf) + +diff --git a/string/test-strncmp.c b/string/test-strncmp.c +index 97e831d88fd24316..56e23670ae7f90e4 100644 +--- a/string/test-strncmp.c ++++ b/string/test-strncmp.c +@@ -438,13 +438,23 @@ check3 (void) + static void + check4 (void) + { +- const CHAR *s1 = L ("abc"); +- CHAR *s2 = STRDUP (s1); ++ /* To trigger bug 28895; We need 1) both s1 and s2 to be within 32 bytes of ++ the end of the page. 2) For there to be no mismatch/null byte before the ++ first page cross. 3) For length (`n`) to be large enough for one string to ++ cross the page. And 4) for there to be either mismatch/null bytes before ++ the start of the strings. */ ++ ++ size_t size = 10; ++ size_t addr_mask = (getpagesize () - 1) ^ (sizeof (CHAR) - 1); ++ CHAR *s1 = (CHAR *)(buf1 + (addr_mask & 0xffa)); ++ CHAR *s2 = (CHAR *)(buf2 + (addr_mask & 0xfed)); ++ int exp_result; + ++ STRCPY (s1, L ("tst-tlsmod%")); ++ STRCPY (s2, L ("tst-tls-manydynamic73mod")); ++ exp_result = SIMPLE_STRNCMP (s1, s2, size); + FOR_EACH_IMPL (impl, 0) +- check_result (impl, s1, s2, SIZE_MAX, 0); +- +- free (s2); ++ check_result (impl, s1, s2, size, exp_result); + } + + int +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index cdded412a70bad10..f9bdc5ccd03aa1f9 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -661,6 +661,7 @@ L(ret8): + # ifdef USE_AS_STRNCMP + .p2align 4,, 10 + L(return_page_cross_end_check): ++ andl %r10d, %ecx + tzcntl %ecx, %ecx + leal -VEC_SIZE(%rax, %rcx), %ecx + cmpl %ecx, %edx +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index ed56af8ecdad48b2..0dfa62bd149c02b4 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -689,6 +689,7 @@ L(ret8): + # ifdef USE_AS_STRNCMP + .p2align 4,, 10 + L(return_page_cross_end_check): ++ andl %r10d, %ecx + tzcntl %ecx, %ecx + leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx + # ifdef USE_AS_WCSCMP diff --git a/SOURCES/glibc-upstream-2.34-210.patch b/SOURCES/glibc-upstream-2.34-210.patch new file mode 100644 index 0000000..4898d45 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-210.patch @@ -0,0 +1,71 @@ +commit e123f08ad5ea4691bc37430ce536988c221332d6 +Author: Noah Goldstein +Date: Thu Mar 24 15:50:33 2022 -0500 + + x86: Fix fallback for wcsncmp_avx2 in strcmp-avx2.S [BZ #28896] + + Overflow case for __wcsncmp_avx2_rtm should be __wcscmp_avx2_rtm not + __wcscmp_avx2. + + commit ddf0992cf57a93200e0c782e2a94d0733a5a0b87 + Author: Noah Goldstein + Date: Sun Jan 9 16:02:21 2022 -0600 + + x86: Fix __wcsncmp_avx2 in strcmp-avx2.S [BZ# 28755] + + Set the wrong fallback function for `__wcsncmp_avx2_rtm`. It was set + to fallback on to `__wcscmp_avx2` instead of `__wcscmp_avx2_rtm` which + can cause spurious aborts. + + This change will need to be backported. + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 9fef7039a7d04947bc89296ee0d187bc8d89b772) + +diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c +index aef9866cf2fbe774..ba6543be8ce13927 100644 +--- a/sysdeps/x86/tst-strncmp-rtm.c ++++ b/sysdeps/x86/tst-strncmp-rtm.c +@@ -70,6 +70,16 @@ function_overflow (void) + return 1; + } + ++__attribute__ ((noinline, noclone)) ++static int ++function_overflow2 (void) ++{ ++ if (STRNCMP (string1, string2, SIZE_MAX >> 4) == 0) ++ return 0; ++ else ++ return 1; ++} ++ + static int + do_test (void) + { +@@ -77,5 +87,10 @@ do_test (void) + if (status != EXIT_SUCCESS) + return status; + status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow); ++ if (status != EXIT_SUCCESS) ++ return status; ++ status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow2); ++ if (status != EXIT_SUCCESS) ++ return status; + return status; + } +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index f9bdc5ccd03aa1f9..09a73942086f9c9f 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -122,7 +122,7 @@ ENTRY(STRCMP) + are cases where length is large enough that it can never be a + bound on valid memory so just use wcscmp. */ + shrq $56, %rcx +- jnz __wcscmp_avx2 ++ jnz OVERFLOW_STRCMP + + leaq (, %rdx, 4), %rdx + # endif diff --git a/SOURCES/glibc-upstream-2.34-211.patch b/SOURCES/glibc-upstream-2.34-211.patch new file mode 100644 index 0000000..1221458 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-211.patch @@ -0,0 +1,170 @@ +commit e4a2fb76efb45210c541ee3f8ef32f317783c3a8 +Author: Florian Weimer +Date: Wed May 11 20:30:49 2022 +0200 + + manual: Document the dlinfo function + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 93804a1ee084d4bdc620b2b9f91615c7da0fabe1) + + Also includes partial backport of commit 5d28a8962dcb6ec056b81d730e + (the addition of manual/dynlink.texi). + +diff --git a/manual/Makefile b/manual/Makefile +index e83444341e282916..31678681ef059e0f 100644 +--- a/manual/Makefile ++++ b/manual/Makefile +@@ -39,7 +39,7 @@ chapters = $(addsuffix .texi, \ + pipe socket terminal syslog math arith time \ + resource setjmp signal startup process ipc job \ + nss users sysinfo conf crypt debug threads \ +- probes tunables) ++ dynlink probes tunables) + appendices = lang.texi header.texi install.texi maint.texi platform.texi \ + contrib.texi + licenses = freemanuals.texi lgpl-2.1.texi fdl-1.3.texi +diff --git a/manual/dynlink.texi b/manual/dynlink.texi +new file mode 100644 +index 0000000000000000..dbf3de11769d8e57 +--- /dev/null ++++ b/manual/dynlink.texi +@@ -0,0 +1,100 @@ ++@node Dynamic Linker ++@c @node Dynamic Linker, Internal Probes, Threads, Top ++@c %MENU% Loading programs and shared objects. ++@chapter Dynamic Linker ++@cindex dynamic linker ++@cindex dynamic loader ++ ++The @dfn{dynamic linker} is responsible for loading dynamically linked ++programs and their dependencies (in the form of shared objects). The ++dynamic linker in @theglibc{} also supports loading shared objects (such ++as plugins) later at run time. ++ ++Dynamic linkers are sometimes called @dfn{dynamic loaders}. ++ ++@menu ++* Dynamic Linker Introspection:: Interfaces for querying mapping information. ++@end menu ++ ++@node Dynamic Linker Introspection ++@section Dynamic Linker Introspection ++ ++@Theglibc{} provides various functions for querying information from the ++dynamic linker. ++ ++@deftypefun {int} dlinfo (void *@var{handle}, int @var{request}, void *@var{arg}) ++@safety{@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} ++@standards{GNU, dlfcn.h} ++This function returns information about @var{handle} in the memory ++location @var{arg}, based on @var{request}. The @var{handle} argument ++must be a pointer returned by @code{dlopen} or @code{dlmopen}; it must ++not have been closed by @code{dlclose}. ++ ++On success, @code{dlinfo} returns 0. If there is an error, the function ++returns @math{-1}, and @code{dlerror} can be used to obtain a ++corresponding error message. ++ ++The following operations are defined for use with @var{request}: ++ ++@vtable @code ++@item RTLD_DI_LINKMAP ++The corresponding @code{struct link_map} pointer for @var{handle} is ++written to @code{*@var{arg}}. The @var{arg} argument must be the ++address of an object of type @code{struct link_map *}. ++ ++@item RTLD_DI_LMID ++The namespace identifier of @var{handle} is written to ++@code{*@var{arg}}. The @var{arg} argument must be the address of an ++object of type @code{Lmid_t}. ++ ++@item RTLD_DI_ORIGIN ++The value of the @code{$ORIGIN} dynamic string token for @var{handle} is ++written to the character array starting at @var{arg} as a ++null-terminated string. ++ ++This request type should not be used because it is prone to buffer ++overflows. ++ ++@item RTLD_DI_SERINFO ++@itemx RTLD_DI_SERINFOSIZE ++These requests can be used to obtain search path information for ++@var{handle}. For both requests, @var{arg} must point to a ++@code{Dl_serinfo} object. The @code{RTLD_DI_SERINFOSIZE} request must ++be made first; it updates the @code{dls_size} and @code{dls_cnt} members ++of the @code{Dl_serinfo} object. The caller should then allocate memory ++to store at least @code{dls_size} bytes and pass that buffer to a ++@code{RTLD_DI_SERINFO} request. This second request fills the ++@code{dls_serpath} array. The number of array elements was returned in ++the @code{dls_cnt} member in the initial @code{RTLD_DI_SERINFOSIZE} ++request. The caller is responsible for freeing the allocated buffer. ++ ++This interface is prone to buffer overflows in multi-threaded processes ++because the required size can change between the ++@code{RTLD_DI_SERINFOSIZE} and @code{RTLD_DI_SERINFO} requests. ++ ++@item RTLD_DI_TLS_DATA ++This request writes the address of the TLS block (in the current thread) ++for the shared object identified by @var{handle} to @code{*@var{arg}}. ++The argument @var{arg} must be the address of an object of type ++@code{void *}. A null pointer is written if the object does not have ++any associated TLS block. ++ ++@item RTLD_DI_TLS_MODID ++This request writes the TLS module ID for the shared object @var{handle} ++to @code{*@var{arg}}. The argument @var{arg} must be the address of an ++object of type @code{size_t}. The module ID is zero if the object ++does not have an associated TLS block. ++@end vtable ++ ++The @code{dlinfo} function is a GNU extension. ++@end deftypefun ++ ++@c FIXME these are undocumented: ++@c dladdr ++@c dladdr1 ++@c dlclose ++@c dlerror ++@c dlmopen ++@c dlopen ++@c dlsym ++@c dlvsym +diff --git a/manual/libdl.texi b/manual/libdl.texi +deleted file mode 100644 +index e3fe0452d9f41d47..0000000000000000 +--- a/manual/libdl.texi ++++ /dev/null +@@ -1,10 +0,0 @@ +-@c FIXME these are undocumented: +-@c dladdr +-@c dladdr1 +-@c dlclose +-@c dlerror +-@c dlinfo +-@c dlmopen +-@c dlopen +-@c dlsym +-@c dlvsym +diff --git a/manual/probes.texi b/manual/probes.texi +index 4aae76b81921f347..ee019e651706f492 100644 +--- a/manual/probes.texi ++++ b/manual/probes.texi +@@ -1,5 +1,5 @@ + @node Internal Probes +-@c @node Internal Probes, Tunables, Threads, Top ++@c @node Internal Probes, Tunables, Dynamic Linker, Top + @c %MENU% Probes to monitor libc internal behavior + @chapter Internal probes + +diff --git a/manual/threads.texi b/manual/threads.texi +index 06b6b277a1228af1..7f166bfa87e88c36 100644 +--- a/manual/threads.texi ++++ b/manual/threads.texi +@@ -1,5 +1,5 @@ + @node Threads +-@c @node Threads, Internal Probes, Debugging Support, Top ++@c @node Threads, Dynamic Linker, Debugging Support, Top + @c %MENU% Functions, constants, and data types for working with threads + @chapter Threads + @cindex threads diff --git a/SOURCES/glibc-upstream-2.34-212.patch b/SOURCES/glibc-upstream-2.34-212.patch new file mode 100644 index 0000000..000023f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-212.patch @@ -0,0 +1,256 @@ +commit 91c2e6c3db44297bf4cb3a2e3c40236c5b6a0b23 +Author: Florian Weimer +Date: Fri Apr 29 17:00:53 2022 +0200 + + dlfcn: Implement the RTLD_DI_PHDR request type for dlinfo + + The information is theoretically available via dl_iterate_phdr as + well, but that approach is very slow if there are many shared + objects. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit d056c212130280c0a54d9a4f72170ec621b70ce5) + +diff --git a/dlfcn/Makefile b/dlfcn/Makefile +index 6bbfbb8344da05cb..d3965427dabed898 100644 +--- a/dlfcn/Makefile ++++ b/dlfcn/Makefile +@@ -73,6 +73,10 @@ tststatic3-ENV = $(tststatic-ENV) + tststatic4-ENV = $(tststatic-ENV) + tststatic5-ENV = $(tststatic-ENV) + ++tests-internal += \ ++ tst-dlinfo-phdr \ ++ # tests-internal ++ + ifneq (,$(CXX)) + modules-names += bug-atexit3-lib + else +diff --git a/dlfcn/dlfcn.h b/dlfcn/dlfcn.h +index 4a3b870a487ea789..24388cfedae4dd67 100644 +--- a/dlfcn/dlfcn.h ++++ b/dlfcn/dlfcn.h +@@ -162,7 +162,12 @@ enum + segment, or if the calling thread has not allocated a block for it. */ + RTLD_DI_TLS_DATA = 10, + +- RTLD_DI_MAX = 10 ++ /* Treat ARG as const ElfW(Phdr) **, and store the address of the ++ program header array at that location. The dlinfo call returns ++ the number of program headers in the array. */ ++ RTLD_DI_PHDR = 11, ++ ++ RTLD_DI_MAX = 11 + }; + + +diff --git a/dlfcn/dlinfo.c b/dlfcn/dlinfo.c +index 47d2daa96fa5986f..1842925fb7c594dd 100644 +--- a/dlfcn/dlinfo.c ++++ b/dlfcn/dlinfo.c +@@ -28,6 +28,10 @@ struct dlinfo_args + void *handle; + int request; + void *arg; ++ ++ /* This is the value that is returned from dlinfo if no error is ++ signaled. */ ++ int result; + }; + + static void +@@ -40,6 +44,7 @@ dlinfo_doit (void *argsblock) + { + case RTLD_DI_CONFIGADDR: + default: ++ args->result = -1; + _dl_signal_error (0, NULL, NULL, N_("unsupported dlinfo request")); + break; + +@@ -75,6 +80,11 @@ dlinfo_doit (void *argsblock) + *(void **) args->arg = data; + break; + } ++ ++ case RTLD_DI_PHDR: ++ *(const ElfW(Phdr) **) args->arg = l->l_phdr; ++ args->result = l->l_phnum; ++ break; + } + } + +@@ -82,7 +92,8 @@ static int + dlinfo_implementation (void *handle, int request, void *arg) + { + struct dlinfo_args args = { handle, request, arg }; +- return _dlerror_run (&dlinfo_doit, &args) ? -1 : 0; ++ _dlerror_run (&dlinfo_doit, &args); ++ return args.result; + } + + #ifdef SHARED +diff --git a/dlfcn/tst-dlinfo-phdr.c b/dlfcn/tst-dlinfo-phdr.c +new file mode 100644 +index 0000000000000000..a15a7d48ebd3b976 +--- /dev/null ++++ b/dlfcn/tst-dlinfo-phdr.c +@@ -0,0 +1,125 @@ ++/* Test for dlinfo (RTLD_DI_PHDR). ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++/* Used to verify that the program header array appears as expected ++ among the dl_iterate_phdr callback invocations. */ ++ ++struct dlip_callback_args ++{ ++ struct link_map *l; /* l->l_addr is used to find the object. */ ++ const ElfW(Phdr) *phdr; /* Expected program header pointed. */ ++ int phnum; /* Expected program header count. */ ++ bool found; /* True if l->l_addr has been found. */ ++}; ++ ++static int ++dlip_callback (struct dl_phdr_info *dlpi, size_t size, void *closure) ++{ ++ TEST_COMPARE (sizeof (*dlpi), size); ++ struct dlip_callback_args *args = closure; ++ ++ if (dlpi->dlpi_addr == args->l->l_addr) ++ { ++ TEST_VERIFY (!args->found); ++ args->found = true; ++ TEST_VERIFY (args->phdr == dlpi->dlpi_phdr); ++ TEST_COMPARE (args->phnum, dlpi->dlpi_phnum); ++ } ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ /* Avoid a copy relocation. */ ++ struct r_debug *debug = xdlsym (RTLD_DEFAULT, "_r_debug"); ++ struct link_map *l = (struct link_map *) debug->r_map; ++ TEST_VERIFY_EXIT (l != NULL); ++ ++ do ++ { ++ printf ("info: checking link map %p (%p) for \"%s\"\n", ++ l, l->l_phdr, l->l_name); ++ ++ /* Cause dlerror () to return an error message. */ ++ dlsym (RTLD_DEFAULT, "does-not-exist"); ++ ++ /* Use the extension that link maps are valid dlopen handles. */ ++ const ElfW(Phdr) *phdr; ++ int phnum = dlinfo (l, RTLD_DI_PHDR, &phdr); ++ TEST_VERIFY (phnum >= 0); ++ /* Verify that the error message has been cleared. */ ++ TEST_COMPARE_STRING (dlerror (), NULL); ++ ++ TEST_VERIFY (phdr == l->l_phdr); ++ TEST_COMPARE (phnum, l->l_phnum); ++ ++ /* Check that we can find PT_DYNAMIC among the array. */ ++ { ++ bool dynamic_found = false; ++ for (int i = 0; i < phnum; ++i) ++ if (phdr[i].p_type == PT_DYNAMIC) ++ { ++ dynamic_found = true; ++ TEST_COMPARE ((ElfW(Addr)) l->l_ld, l->l_addr + phdr[i].p_vaddr); ++ } ++ TEST_VERIFY (dynamic_found); ++ } ++ ++ /* Check that dl_iterate_phdr finds the link map with the same ++ program headers. */ ++ { ++ struct dlip_callback_args args = ++ { ++ .l = l, ++ .phdr = phdr, ++ .phnum = phnum, ++ .found = false, ++ }; ++ TEST_COMPARE (dl_iterate_phdr (dlip_callback, &args), 0); ++ TEST_VERIFY (args.found); ++ } ++ ++ if (l->l_prev == NULL) ++ { ++ /* This is the executable, so the information is also ++ available via getauxval. */ ++ TEST_COMPARE_STRING (l->l_name, ""); ++ TEST_VERIFY (phdr == (const ElfW(Phdr) *) getauxval (AT_PHDR)); ++ TEST_COMPARE (phnum, getauxval (AT_PHNUM)); ++ } ++ ++ l = l->l_next; ++ } ++ while (l != NULL); ++ ++ return 0; ++} ++ ++#include +diff --git a/manual/dynlink.texi b/manual/dynlink.texi +index dbf3de11769d8e57..7dcac64889e389fd 100644 +--- a/manual/dynlink.texi ++++ b/manual/dynlink.texi +@@ -30,9 +30,9 @@ location @var{arg}, based on @var{request}. The @var{handle} argument + must be a pointer returned by @code{dlopen} or @code{dlmopen}; it must + not have been closed by @code{dlclose}. + +-On success, @code{dlinfo} returns 0. If there is an error, the function +-returns @math{-1}, and @code{dlerror} can be used to obtain a +-corresponding error message. ++On success, @code{dlinfo} returns 0 for most request types; exceptions ++are noted below. If there is an error, the function returns @math{-1}, ++and @code{dlerror} can be used to obtain a corresponding error message. + + The following operations are defined for use with @var{request}: + +@@ -84,6 +84,15 @@ This request writes the TLS module ID for the shared object @var{handle} + to @code{*@var{arg}}. The argument @var{arg} must be the address of an + object of type @code{size_t}. The module ID is zero if the object + does not have an associated TLS block. ++ ++@item RTLD_DI_PHDR ++This request writes the address of the program header array to ++@code{*@var{arg}}. The argument @var{arg} must be the address of an ++object of type @code{const ElfW(Phdr) *} (that is, ++@code{const Elf32_Phdr *} or @code{const Elf64_Phdr *}, as appropriate ++for the current architecture). For this request, the value returned by ++@code{dlinfo} is the number of program headers in the program header ++array. + @end vtable + + The @code{dlinfo} function is a GNU extension. diff --git a/SOURCES/glibc-upstream-2.34-213.patch b/SOURCES/glibc-upstream-2.34-213.patch new file mode 100644 index 0000000..544f599 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-213.patch @@ -0,0 +1,31 @@ +commit b72bbba23687ed67887d1d18c51cce5cc9c575ca +Author: Siddhesh Poyarekar +Date: Fri May 13 10:01:47 2022 +0530 + + fortify: Ensure that __glibc_fortify condition is a constant [BZ #29141] + + The fix c8ee1c85 introduced a -1 check for object size without also + checking that object size is a constant. Because of this, the tree + optimizer passes in gcc fail to fold away one of the branches in + __glibc_fortify and trips on a spurious Wstringop-overflow. The warning + itself is incorrect and the branch does go away eventually in DCE in the + rtl passes in gcc, but the constant check is a helpful hint to simplify + code early, so add it in. + + Resolves: BZ #29141 + Signed-off-by: Siddhesh Poyarekar + (cherry picked from commit 61a87530108ec9181e1b18a9b727ec3cc3ba7532) + +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index b36013b9a6b4d9c3..e0ecd9147ee3ce48 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -163,7 +163,7 @@ + /* Length is known to be safe at compile time if the __L * __S <= __OBJSZ + condition can be folded to a constant and if it is true, or unknown (-1) */ + #define __glibc_safe_or_unknown_len(__l, __s, __osz) \ +- ((__osz) == (__SIZE_TYPE__) -1 \ ++ ((__builtin_constant_p (__osz) && (__osz) == (__SIZE_TYPE__) -1) \ + || (__glibc_unsigned_or_positive (__l) \ + && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ + (__s), (__osz))) \ diff --git a/SOURCES/glibc-upstream-2.34-214.patch b/SOURCES/glibc-upstream-2.34-214.patch new file mode 100644 index 0000000..d51a006 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-214.patch @@ -0,0 +1,22 @@ +commit 8de6e4a199ba6cc8aaeb43924b974eed67164bd6 +Author: H.J. Lu +Date: Sat Feb 5 11:06:01 2022 -0800 + + x86: Improve L to support L(XXX_SYMBOL (YYY, ZZZ)) + + (cherry picked from commit 1283948f236f209b7d3f44b69a42b96806fa6da0) + +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index 937180c1bd791570..deda1c4e492f6176 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -111,7 +111,8 @@ enum cf_protection_level + /* Local label name for asm code. */ + #ifndef L + /* ELF-like local names start with `.L'. */ +-# define L(name) .L##name ++# define LOCAL_LABEL(name) .L##name ++# define L(name) LOCAL_LABEL(name) + #endif + + #define atom_text_section .section ".text.atom", "ax" diff --git a/SOURCES/glibc-upstream-2.34-215.patch b/SOURCES/glibc-upstream-2.34-215.patch new file mode 100644 index 0000000..d33cace --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-215.patch @@ -0,0 +1,98 @@ +commit 6cba46c85804988f4fd41ef03e8a170a4c987a86 +Author: H.J. Lu +Date: Sat Feb 5 11:52:33 2022 -0800 + + x86_64/multiarch: Sort sysdep_routines and put one entry per line + + (cherry picked from commit c328d0152d4b14cca58407ec68143894c8863004) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 37d8d6f0bd2d10cc..8c9e7812c6af10b8 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -132,37 +132,55 @@ CFLAGS-strspn-c.c += -msse4 + endif + + ifeq ($(subdir),wcsmbs) +-sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ +- wmemcmp-avx2-movbe \ +- wmemchr-sse2 wmemchr-avx2 \ +- wcscmp-sse2 wcscmp-avx2 \ +- wcsncmp-sse2 wcsncmp-avx2 \ +- wcscpy-ssse3 wcscpy-c \ +- wcschr-sse2 wcschr-avx2 \ +- wcsrchr-sse2 wcsrchr-avx2 \ +- wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ +- wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ +- wcschr-avx2-rtm \ +- wcscmp-avx2-rtm \ +- wcslen-avx2-rtm \ +- wcsncmp-avx2-rtm \ +- wcsnlen-avx2-rtm \ +- wcsrchr-avx2-rtm \ +- wmemchr-avx2-rtm \ +- wmemcmp-avx2-movbe-rtm \ +- wcschr-evex \ +- wcscmp-evex \ +- wcslen-evex \ +- wcsncmp-evex \ +- wcsnlen-evex \ +- wcsrchr-evex \ +- wmemchr-evex \ +- wmemcmp-evex-movbe \ +- wmemchr-evex-rtm ++sysdep_routines += \ ++ wcschr-avx2 \ ++ wcschr-avx2-rtm \ ++ wcschr-evex \ ++ wcschr-sse2 \ ++ wcscmp-avx2 \ ++ wcscmp-avx2-rtm \ ++ wcscmp-evex \ ++ wcscmp-sse2 \ ++ wcscpy-c \ ++ wcscpy-ssse3 \ ++ wcslen-avx2 \ ++ wcslen-avx2-rtm \ ++ wcslen-evex \ ++ wcslen-sse2 \ ++ wcslen-sse4_1 \ ++ wcsncmp-avx2 \ ++ wcsncmp-avx2-rtm \ ++ wcsncmp-evex \ ++ wcsncmp-sse2 \ ++ wcsnlen-avx2 \ ++ wcsnlen-avx2-rtm \ ++ wcsnlen-c \ ++ wcsnlen-evex \ ++ wcsnlen-sse4_1 \ ++ wcsrchr-avx2 \ ++ wcsrchr-avx2-rtm \ ++ wcsrchr-evex \ ++ wcsrchr-sse2 \ ++ wmemchr-avx2 \ ++ wmemchr-avx2-rtm \ ++ wmemchr-evex \ ++ wmemchr-evex-rtm \ ++ wmemchr-sse2 \ ++ wmemcmp-avx2-movbe \ ++ wmemcmp-avx2-movbe-rtm \ ++ wmemcmp-c \ ++ wmemcmp-evex-movbe \ ++ wmemcmp-sse4 \ ++ wmemcmp-ssse3 \ ++# sysdep_routines + endif + + ifeq ($(subdir),debug) +-sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \ +- memmove_chk-nonshared memset_chk-nonshared \ +- wmemset_chk-nonshared ++sysdep_routines += \ ++ memcpy_chk-nonshared \ ++ memmove_chk-nonshared \ ++ mempcpy_chk-nonshared \ ++ memset_chk-nonshared \ ++ wmemset_chk-nonshared \ ++# sysdep_routines + endif diff --git a/SOURCES/glibc-upstream-2.34-216.patch b/SOURCES/glibc-upstream-2.34-216.patch new file mode 100644 index 0000000..b1e36ab --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-216.patch @@ -0,0 +1,32 @@ +commit 37f373e33496ea437cc7e375cc835c20d4b35fb2 +Author: H.J. Lu +Date: Thu Feb 10 11:52:50 2022 -0800 + + x86-64: Remove bzero weak alias in SS2 memset + + commit 3d9f171bfb5325bd5f427e9fc386453358c6e840 + Author: H.J. Lu + Date: Mon Feb 7 05:55:15 2022 -0800 + + x86-64: Optimize bzero + + added the optimized bzero. Remove bzero weak alias in SS2 memset to + avoid undefined __bzero in memset-sse2-unaligned-erms. + + (cherry picked from commit 0fb8800029d230b3711bf722b2a47db92d0e273f) + +diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +index bac74ac37fd3c144..2951f7f5f70e274a 100644 +--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +@@ -31,9 +31,7 @@ + # endif + + # undef weak_alias +-# define weak_alias(original, alias) \ +- .weak bzero; bzero = __bzero +- ++# define weak_alias(original, alias) + # undef strong_alias + # define strong_alias(ignored1, ignored2) + #endif diff --git a/SOURCES/glibc-upstream-2.34-217.patch b/SOURCES/glibc-upstream-2.34-217.patch new file mode 100644 index 0000000..8f92420 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-217.patch @@ -0,0 +1,24 @@ +commit dd457606ca4583b4a5e83d4e8956e6f9db61df6d +Author: Adhemerval Zanella +Date: Thu Feb 10 11:23:24 2022 -0300 + + x86_64: Remove bcopy optimizations + + The symbols is not present in current POSIX specification and compiler + already generates memmove call. + + (cherry picked from commit bf92893a14ebc161b08b28acc24fa06ae6be19cb) + +diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S +deleted file mode 100644 +index 639f02bde3ac3ed1..0000000000000000 +--- a/sysdeps/x86_64/multiarch/bcopy.S ++++ /dev/null +@@ -1,7 +0,0 @@ +-#include +- +- .text +-ENTRY(bcopy) +- xchg %rdi, %rsi +- jmp __libc_memmove /* Branch to IFUNC memmove. */ +-END(bcopy) diff --git a/SOURCES/glibc-upstream-2.34-218.patch b/SOURCES/glibc-upstream-2.34-218.patch new file mode 100644 index 0000000..312016b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-218.patch @@ -0,0 +1,367 @@ +commit 3c55c207564c0ae30d78d01689b4ae16bf38dd63 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:16 2022 -0500 + + x86: Code cleanup in strchr-avx2 and comment justifying branch + + Small code cleanup for size: -53 bytes. + + Add comment justifying using a branch to do NULL/non-null return. + + All string/memory tests pass and no regressions in benchtests. + + geometric_mean(N=20) of all benchmarks Original / New: 1.00 + Reviewed-by: H.J. Lu + + (cherry picked from commit a6fbf4d51e9ba8063c4f8331564892ead9c67344) + +diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S +index 413942b96a835c4a..ef4ce0f3677e30c8 100644 +--- a/sysdeps/x86_64/multiarch/strchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/strchr-avx2.S +@@ -48,13 +48,13 @@ + # define PAGE_SIZE 4096 + + .section SECTION(.text),"ax",@progbits +-ENTRY (STRCHR) ++ENTRY_P2ALIGN (STRCHR, 5) + /* Broadcast CHAR to YMM0. */ + vmovd %esi, %xmm0 + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax + VPBROADCAST %xmm0, %ymm0 +- vpxor %xmm9, %xmm9, %xmm9 ++ vpxor %xmm1, %xmm1, %xmm1 + + /* Check if we cross page boundary with one vector load. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax +@@ -62,37 +62,29 @@ ENTRY (STRCHR) + + /* Check the first VEC_SIZE bytes. Search for both CHAR and the + null byte. */ +- vmovdqu (%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqu (%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax + # ifndef USE_AS_STRCHRNUL +- /* Found CHAR or the null byte. */ +- cmp (%rdi, %rax), %CHAR_REG +- jne L(zero) +-# endif +- addq %rdi, %rax +- VZEROUPPER_RETURN +- +- /* .p2align 5 helps keep performance more consistent if ENTRY() +- alignment % 32 was either 16 or 0. As well this makes the +- alignment % 32 of the loop_4x_vec fixed which makes tuning it +- easier. */ +- .p2align 5 +-L(first_vec_x4): +- tzcntl %eax, %eax +- addq $(VEC_SIZE * 3 + 1), %rdi +-# ifndef USE_AS_STRCHRNUL +- /* Found CHAR or the null byte. */ ++ /* Found CHAR or the null byte. */ + cmp (%rdi, %rax), %CHAR_REG ++ /* NB: Use a branch instead of cmovcc here. The expectation is ++ that with strchr the user will branch based on input being ++ null. Since this branch will be 100% predictive of the user ++ branch a branch miss here should save what otherwise would ++ be branch miss in the user code. Otherwise using a branch 1) ++ saves code size and 2) is faster in highly predictable ++ environments. */ + jne L(zero) + # endif + addq %rdi, %rax +- VZEROUPPER_RETURN ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN + + # ifndef USE_AS_STRCHRNUL + L(zero): +@@ -103,7 +95,8 @@ L(zero): + + .p2align 4 + L(first_vec_x1): +- tzcntl %eax, %eax ++ /* Use bsf to save code size. */ ++ bsfl %eax, %eax + incq %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ +@@ -113,9 +106,10 @@ L(first_vec_x1): + addq %rdi, %rax + VZEROUPPER_RETURN + +- .p2align 4 ++ .p2align 4,, 10 + L(first_vec_x2): +- tzcntl %eax, %eax ++ /* Use bsf to save code size. */ ++ bsfl %eax, %eax + addq $(VEC_SIZE + 1), %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ +@@ -125,9 +119,10 @@ L(first_vec_x2): + addq %rdi, %rax + VZEROUPPER_RETURN + +- .p2align 4 ++ .p2align 4,, 8 + L(first_vec_x3): +- tzcntl %eax, %eax ++ /* Use bsf to save code size. */ ++ bsfl %eax, %eax + addq $(VEC_SIZE * 2 + 1), %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ +@@ -137,6 +132,21 @@ L(first_vec_x3): + addq %rdi, %rax + VZEROUPPER_RETURN + ++ .p2align 4,, 10 ++L(first_vec_x4): ++ /* Use bsf to save code size. */ ++ bsfl %eax, %eax ++ addq $(VEC_SIZE * 3 + 1), %rdi ++# ifndef USE_AS_STRCHRNUL ++ /* Found CHAR or the null byte. */ ++ cmp (%rdi, %rax), %CHAR_REG ++ jne L(zero) ++# endif ++ addq %rdi, %rax ++ VZEROUPPER_RETURN ++ ++ ++ + .p2align 4 + L(aligned_more): + /* Align data to VEC_SIZE - 1. This is the same number of +@@ -146,90 +156,92 @@ L(aligned_more): + L(cross_page_continue): + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ +- vmovdqa 1(%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqa 1(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jnz L(first_vec_x1) + +- vmovdqa (VEC_SIZE + 1)(%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqa (VEC_SIZE + 1)(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jnz L(first_vec_x2) + +- vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jnz L(first_vec_x3) + +- vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jnz L(first_vec_x4) +- /* Align data to VEC_SIZE * 4 - 1. */ +- addq $(VEC_SIZE * 4 + 1), %rdi +- andq $-(VEC_SIZE * 4), %rdi ++ /* Align data to VEC_SIZE * 4 - 1. */ ++ incq %rdi ++ orq $(VEC_SIZE * 4 - 1), %rdi + .p2align 4 + L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ +- vmovdqa (%rdi), %ymm5 +- vmovdqa (VEC_SIZE)(%rdi), %ymm6 +- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm7 +- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8 ++ vmovdqa 1(%rdi), %ymm6 ++ vmovdqa (VEC_SIZE + 1)(%rdi), %ymm7 + + /* Leaves only CHARS matching esi as 0. */ +- vpxor %ymm5, %ymm0, %ymm1 + vpxor %ymm6, %ymm0, %ymm2 + vpxor %ymm7, %ymm0, %ymm3 +- vpxor %ymm8, %ymm0, %ymm4 + +- VPMINU %ymm1, %ymm5, %ymm1 + VPMINU %ymm2, %ymm6, %ymm2 + VPMINU %ymm3, %ymm7, %ymm3 +- VPMINU %ymm4, %ymm8, %ymm4 + +- VPMINU %ymm1, %ymm2, %ymm5 +- VPMINU %ymm3, %ymm4, %ymm6 ++ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm6 ++ vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm7 ++ ++ vpxor %ymm6, %ymm0, %ymm4 ++ vpxor %ymm7, %ymm0, %ymm5 ++ ++ VPMINU %ymm4, %ymm6, %ymm4 ++ VPMINU %ymm5, %ymm7, %ymm5 + +- VPMINU %ymm5, %ymm6, %ymm6 ++ VPMINU %ymm2, %ymm3, %ymm6 ++ VPMINU %ymm4, %ymm5, %ymm7 + +- VPCMPEQ %ymm6, %ymm9, %ymm6 +- vpmovmskb %ymm6, %ecx ++ VPMINU %ymm6, %ymm7, %ymm7 ++ ++ VPCMPEQ %ymm7, %ymm1, %ymm7 ++ vpmovmskb %ymm7, %ecx + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) + +- +- VPCMPEQ %ymm1, %ymm9, %ymm1 +- vpmovmskb %ymm1, %eax ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpmovmskb %ymm2, %eax + testl %eax, %eax + jnz L(last_vec_x0) + + +- VPCMPEQ %ymm5, %ymm9, %ymm2 +- vpmovmskb %ymm2, %eax ++ VPCMPEQ %ymm3, %ymm1, %ymm3 ++ vpmovmskb %ymm3, %eax + testl %eax, %eax + jnz L(last_vec_x1) + +- VPCMPEQ %ymm3, %ymm9, %ymm3 +- vpmovmskb %ymm3, %eax ++ VPCMPEQ %ymm4, %ymm1, %ymm4 ++ vpmovmskb %ymm4, %eax + /* rcx has combined result from all 4 VEC. It will only be used + if the first 3 other VEC all did not contain a match. */ + salq $32, %rcx + orq %rcx, %rax + tzcntq %rax, %rax +- subq $(VEC_SIZE * 2), %rdi ++ subq $(VEC_SIZE * 2 - 1), %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ + cmp (%rdi, %rax), %CHAR_REG +@@ -239,10 +251,11 @@ L(loop_4x_vec): + VZEROUPPER_RETURN + + +- .p2align 4 ++ .p2align 4,, 10 + L(last_vec_x0): +- tzcntl %eax, %eax +- addq $-(VEC_SIZE * 4), %rdi ++ /* Use bsf to save code size. */ ++ bsfl %eax, %eax ++ addq $-(VEC_SIZE * 4 - 1), %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ + cmp (%rdi, %rax), %CHAR_REG +@@ -251,16 +264,11 @@ L(last_vec_x0): + addq %rdi, %rax + VZEROUPPER_RETURN + +-# ifndef USE_AS_STRCHRNUL +-L(zero_end): +- xorl %eax, %eax +- VZEROUPPER_RETURN +-# endif + +- .p2align 4 ++ .p2align 4,, 10 + L(last_vec_x1): + tzcntl %eax, %eax +- subq $(VEC_SIZE * 3), %rdi ++ subq $(VEC_SIZE * 3 - 1), %rdi + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ + cmp (%rdi, %rax), %CHAR_REG +@@ -269,18 +277,23 @@ L(last_vec_x1): + addq %rdi, %rax + VZEROUPPER_RETURN + ++# ifndef USE_AS_STRCHRNUL ++L(zero_end): ++ xorl %eax, %eax ++ VZEROUPPER_RETURN ++# endif + + /* Cold case for crossing page with first load. */ +- .p2align 4 ++ .p2align 4,, 8 + L(cross_page_boundary): + movq %rdi, %rdx + /* Align rdi to VEC_SIZE - 1. */ + orq $(VEC_SIZE - 1), %rdi +- vmovdqa -(VEC_SIZE - 1)(%rdi), %ymm8 +- VPCMPEQ %ymm8, %ymm0, %ymm1 +- VPCMPEQ %ymm8, %ymm9, %ymm2 +- vpor %ymm1, %ymm2, %ymm1 +- vpmovmskb %ymm1, %eax ++ vmovdqa -(VEC_SIZE - 1)(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm3 ++ VPCMPEQ %ymm2, %ymm1, %ymm2 ++ vpor %ymm3, %ymm2, %ymm3 ++ vpmovmskb %ymm3, %eax + /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT + so no need to manually mod edx. */ + sarxl %edx, %eax, %eax +@@ -291,13 +304,10 @@ L(cross_page_boundary): + xorl %ecx, %ecx + /* Found CHAR or the null byte. */ + cmp (%rdx, %rax), %CHAR_REG +- leaq (%rdx, %rax), %rax +- cmovne %rcx, %rax +-# else +- addq %rdx, %rax ++ jne L(zero_end) + # endif +-L(return_vzeroupper): +- ZERO_UPPER_VEC_REGISTERS_RETURN ++ addq %rdx, %rax ++ VZEROUPPER_RETURN + + END (STRCHR) +-# endif ++#endif diff --git a/SOURCES/glibc-upstream-2.34-219.patch b/SOURCES/glibc-upstream-2.34-219.patch new file mode 100644 index 0000000..654fb28 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-219.patch @@ -0,0 +1,338 @@ +commit dd6d3a0bbcc67cb2b50b0add0c599f9f99491d8b +Author: Noah Goldstein +Date: Wed Mar 23 16:57:18 2022 -0500 + + x86: Code cleanup in strchr-evex and comment justifying branch + + Small code cleanup for size: -81 bytes. + + Add comment justifying using a branch to do NULL/non-null return. + + All string/memory tests pass and no regressions in benchtests. + + geometric_mean(N=20) of all benchmarks New / Original: .985 + Reviewed-by: H.J. Lu + + (cherry picked from commit ec285ea90415458225623ddc0492ae3f705af043) + +diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S +index 7f9d4ee48ddaa998..0b49e0ac54e7b0dd 100644 +--- a/sysdeps/x86_64/multiarch/strchr-evex.S ++++ b/sysdeps/x86_64/multiarch/strchr-evex.S +@@ -30,6 +30,7 @@ + # ifdef USE_AS_WCSCHR + # define VPBROADCAST vpbroadcastd + # define VPCMP vpcmpd ++# define VPTESTN vptestnmd + # define VPMINU vpminud + # define CHAR_REG esi + # define SHIFT_REG ecx +@@ -37,6 +38,7 @@ + # else + # define VPBROADCAST vpbroadcastb + # define VPCMP vpcmpb ++# define VPTESTN vptestnmb + # define VPMINU vpminub + # define CHAR_REG sil + # define SHIFT_REG edx +@@ -61,13 +63,11 @@ + # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + .section .text.evex,"ax",@progbits +-ENTRY (STRCHR) ++ENTRY_P2ALIGN (STRCHR, 5) + /* Broadcast CHAR to YMM0. */ + VPBROADCAST %esi, %YMM0 + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax +- vpxorq %XMMZERO, %XMMZERO, %XMMZERO +- + /* Check if we cross page boundary with one vector load. + Otherwise it is safe to use an unaligned load. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax +@@ -81,49 +81,35 @@ ENTRY (STRCHR) + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM2, %k0 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax ++# ifndef USE_AS_STRCHRNUL ++ /* Found CHAR or the null byte. */ ++ cmp (%rdi, %rax, CHAR_SIZE), %CHAR_REG ++ /* NB: Use a branch instead of cmovcc here. The expectation is ++ that with strchr the user will branch based on input being ++ null. Since this branch will be 100% predictive of the user ++ branch a branch miss here should save what otherwise would ++ be branch miss in the user code. Otherwise using a branch 1) ++ saves code size and 2) is faster in highly predictable ++ environments. */ ++ jne L(zero) ++# endif + # ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. + */ + leaq (%rdi, %rax, CHAR_SIZE), %rax + # else + addq %rdi, %rax +-# endif +-# ifndef USE_AS_STRCHRNUL +- /* Found CHAR or the null byte. */ +- cmp (%rax), %CHAR_REG +- jne L(zero) + # endif + ret + +- /* .p2align 5 helps keep performance more consistent if ENTRY() +- alignment % 32 was either 16 or 0. As well this makes the +- alignment % 32 of the loop_4x_vec fixed which makes tuning it +- easier. */ +- .p2align 5 +-L(first_vec_x3): +- tzcntl %eax, %eax +-# ifndef USE_AS_STRCHRNUL +- /* Found CHAR or the null byte. */ +- cmp (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %CHAR_REG +- jne L(zero) +-# endif +- /* NB: Multiply sizeof char type (1 or 4) to get the number of +- bytes. */ +- leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax +- ret + +-# ifndef USE_AS_STRCHRNUL +-L(zero): +- xorl %eax, %eax +- ret +-# endif + +- .p2align 4 ++ .p2align 4,, 10 + L(first_vec_x4): + # ifndef USE_AS_STRCHRNUL + /* Check to see if first match was CHAR (k0) or null (k1). */ +@@ -144,9 +130,18 @@ L(first_vec_x4): + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + ret + ++# ifndef USE_AS_STRCHRNUL ++L(zero): ++ xorl %eax, %eax ++ ret ++# endif ++ ++ + .p2align 4 + L(first_vec_x1): +- tzcntl %eax, %eax ++ /* Use bsf here to save 1-byte keeping keeping the block in 1x ++ fetch block. eax guranteed non-zero. */ ++ bsfl %eax, %eax + # ifndef USE_AS_STRCHRNUL + /* Found CHAR or the null byte. */ + cmp (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %CHAR_REG +@@ -158,7 +153,7 @@ L(first_vec_x1): + leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax + ret + +- .p2align 4 ++ .p2align 4,, 10 + L(first_vec_x2): + # ifndef USE_AS_STRCHRNUL + /* Check to see if first match was CHAR (k0) or null (k1). */ +@@ -179,6 +174,21 @@ L(first_vec_x2): + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + ++ .p2align 4,, 10 ++L(first_vec_x3): ++ /* Use bsf here to save 1-byte keeping keeping the block in 1x ++ fetch block. eax guranteed non-zero. */ ++ bsfl %eax, %eax ++# ifndef USE_AS_STRCHRNUL ++ /* Found CHAR or the null byte. */ ++ cmp (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %CHAR_REG ++ jne L(zero) ++# endif ++ /* NB: Multiply sizeof char type (1 or 4) to get the number of ++ bytes. */ ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ + .p2align 4 + L(aligned_more): + /* Align data to VEC_SIZE. */ +@@ -195,7 +205,7 @@ L(cross_page_continue): + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM2, %k0 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x1) +@@ -206,7 +216,7 @@ L(cross_page_continue): + /* Each bit in K0 represents a CHAR in YMM1. */ + VPCMP $0, %YMM1, %YMM0, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMM1, %YMMZERO, %k1 ++ VPTESTN %YMM1, %YMM1, %k1 + kortestd %k0, %k1 + jnz L(first_vec_x2) + +@@ -215,7 +225,7 @@ L(cross_page_continue): + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM2, %k0 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x3) +@@ -224,7 +234,7 @@ L(cross_page_continue): + /* Each bit in K0 represents a CHAR in YMM1. */ + VPCMP $0, %YMM1, %YMM0, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMM1, %YMMZERO, %k1 ++ VPTESTN %YMM1, %YMM1, %k1 + kortestd %k0, %k1 + jnz L(first_vec_x4) + +@@ -265,33 +275,33 @@ L(loop_4x_vec): + VPMINU %YMM3, %YMM4, %YMM4 + VPMINU %YMM2, %YMM4, %YMM4{%k4}{z} + +- VPCMP $0, %YMMZERO, %YMM4, %k1 ++ VPTESTN %YMM4, %YMM4, %k1 + kmovd %k1, %ecx + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) + +- VPCMP $0, %YMMZERO, %YMM1, %k0 ++ VPTESTN %YMM1, %YMM1, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x1) + +- VPCMP $0, %YMMZERO, %YMM2, %k0 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2) + +- VPCMP $0, %YMMZERO, %YMM3, %k0 ++ VPTESTN %YMM3, %YMM3, %k0 + kmovd %k0, %eax + /* Combine YMM3 matches (eax) with YMM4 matches (ecx). */ + # ifdef USE_AS_WCSCHR + sall $8, %ecx + orl %ecx, %eax +- tzcntl %eax, %eax ++ bsfl %eax, %eax + # else + salq $32, %rcx + orq %rcx, %rax +- tzcntq %rax, %rax ++ bsfq %rax, %rax + # endif + # ifndef USE_AS_STRCHRNUL + /* Check if match was CHAR or null. */ +@@ -303,28 +313,28 @@ L(loop_4x_vec): + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + +-# ifndef USE_AS_STRCHRNUL +-L(zero_end): +- xorl %eax, %eax +- ret ++ .p2align 4,, 8 ++L(last_vec_x1): ++ bsfl %eax, %eax ++# ifdef USE_AS_WCSCHR ++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. ++ */ ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax + # endif + +- .p2align 4 +-L(last_vec_x1): +- tzcntl %eax, %eax + # ifndef USE_AS_STRCHRNUL + /* Check if match was null. */ +- cmp (%rdi, %rax, CHAR_SIZE), %CHAR_REG ++ cmp (%rax), %CHAR_REG + jne L(zero_end) + # endif +- /* NB: Multiply sizeof char type (1 or 4) to get the number of +- bytes. */ +- leaq (%rdi, %rax, CHAR_SIZE), %rax ++ + ret + +- .p2align 4 ++ .p2align 4,, 8 + L(last_vec_x2): +- tzcntl %eax, %eax ++ bsfl %eax, %eax + # ifndef USE_AS_STRCHRNUL + /* Check if match was null. */ + cmp (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %CHAR_REG +@@ -336,7 +346,7 @@ L(last_vec_x2): + ret + + /* Cold case for crossing page with first load. */ +- .p2align 4 ++ .p2align 4,, 8 + L(cross_page_boundary): + movq %rdi, %rdx + /* Align rdi. */ +@@ -346,9 +356,9 @@ L(cross_page_boundary): + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM2, %k0 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %eax +- /* Remove the leading bits. */ ++ /* Remove the leading bits. */ + # ifdef USE_AS_WCSCHR + movl %edx, %SHIFT_REG + /* NB: Divide shift count by 4 since each bit in K1 represent 4 +@@ -360,20 +370,24 @@ L(cross_page_boundary): + /* If eax is zero continue. */ + testl %eax, %eax + jz L(cross_page_continue) +- tzcntl %eax, %eax +-# ifndef USE_AS_STRCHRNUL +- /* Check to see if match was CHAR or null. */ +- cmp (%rdx, %rax, CHAR_SIZE), %CHAR_REG +- jne L(zero_end) +-# endif ++ bsfl %eax, %eax ++ + # ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of + bytes. */ + leaq (%rdx, %rax, CHAR_SIZE), %rax + # else + addq %rdx, %rax ++# endif ++# ifndef USE_AS_STRCHRNUL ++ /* Check to see if match was CHAR or null. */ ++ cmp (%rax), %CHAR_REG ++ je L(cross_page_ret) ++L(zero_end): ++ xorl %eax, %eax ++L(cross_page_ret): + # endif + ret + + END (STRCHR) +-# endif ++#endif diff --git a/SOURCES/glibc-upstream-2.34-220.patch b/SOURCES/glibc-upstream-2.34-220.patch new file mode 100644 index 0000000..5f77e5c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-220.patch @@ -0,0 +1,143 @@ +commit 0ae1006967eef11909fbed0f6ecef2f260b133d3 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:22 2022 -0500 + + x86: Optimize strcspn and strpbrk in strcspn-c.c + + Use _mm_cmpeq_epi8 and _mm_movemask_epi8 to get strlen instead of + _mm_cmpistri. Also change offset to unsigned to avoid unnecessary + sign extensions. + + geometric_mean(N=20) of all benchmarks that dont fallback on + sse2/strlen; New / Original: .928 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 30d627d477d7255345a4b713cf352ac32d644d61) + +diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c +index c56ddbd22f014653..2436b6dcd90d8efe 100644 +--- a/sysdeps/x86_64/multiarch/strcspn-c.c ++++ b/sysdeps/x86_64/multiarch/strcspn-c.c +@@ -85,83 +85,74 @@ STRCSPN_SSE42 (const char *s, const char *a) + RETURN (NULL, strlen (s)); + + const char *aligned; +- __m128i mask; +- int offset = (int) ((size_t) a & 15); ++ __m128i mask, maskz, zero; ++ unsigned int maskz_bits; ++ unsigned int offset = (unsigned int) ((size_t) a & 15); ++ zero = _mm_set1_epi8 (0); + if (offset != 0) + { + /* Load masks. */ + aligned = (const char *) ((size_t) a & -16L); + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); +- +- mask = __m128i_shift_right (mask0, offset); ++ maskz = _mm_cmpeq_epi8 (mask0, zero); + + /* Find where the NULL terminator is. */ +- int length = _mm_cmpistri (mask, mask, 0x3a); +- if (length == 16 - offset) +- { +- /* There is no NULL terminator. */ +- __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16)); +- int index = _mm_cmpistri (mask1, mask1, 0x3a); +- length += index; +- +- /* Don't use SSE4.2 if the length of A > 16. */ +- if (length > 16) +- return STRCSPN_SSE2 (s, a); +- +- if (index != 0) +- { +- /* Combine mask0 and mask1. We could play games with +- palignr, but frankly this data should be in L1 now +- so do the merge via an unaligned load. */ +- mask = _mm_loadu_si128 ((__m128i *) a); +- } +- } ++ maskz_bits = _mm_movemask_epi8 (maskz) >> offset; ++ if (maskz_bits != 0) ++ { ++ mask = __m128i_shift_right (mask0, offset); ++ offset = (unsigned int) ((size_t) s & 15); ++ if (offset) ++ goto start_unaligned; ++ ++ aligned = s; ++ goto start_loop; ++ } + } +- else +- { +- /* A is aligned. */ +- mask = _mm_load_si128 ((__m128i *) a); + +- /* Find where the NULL terminator is. */ +- int length = _mm_cmpistri (mask, mask, 0x3a); +- if (length == 16) +- { +- /* There is no NULL terminator. Don't use SSE4.2 if the length +- of A > 16. */ +- if (a[16] != 0) +- return STRCSPN_SSE2 (s, a); +- } ++ /* A is aligned. */ ++ mask = _mm_loadu_si128 ((__m128i *) a); ++ /* Find where the NULL terminator is. */ ++ maskz = _mm_cmpeq_epi8 (mask, zero); ++ maskz_bits = _mm_movemask_epi8 (maskz); ++ if (maskz_bits == 0) ++ { ++ /* There is no NULL terminator. Don't use SSE4.2 if the length ++ of A > 16. */ ++ if (a[16] != 0) ++ return STRCSPN_SSE2 (s, a); + } + +- offset = (int) ((size_t) s & 15); ++ aligned = s; ++ offset = (unsigned int) ((size_t) s & 15); + if (offset != 0) + { ++ start_unaligned: + /* Check partial string. */ + aligned = (const char *) ((size_t) s & -16L); + __m128i value = _mm_load_si128 ((__m128i *) aligned); + + value = __m128i_shift_right (value, offset); + +- int length = _mm_cmpistri (mask, value, 0x2); ++ unsigned int length = _mm_cmpistri (mask, value, 0x2); + /* No need to check ZFlag since ZFlag is always 1. */ +- int cflag = _mm_cmpistrc (mask, value, 0x2); ++ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); + if (cflag) + RETURN ((char *) (s + length), length); + /* Find where the NULL terminator is. */ +- int index = _mm_cmpistri (value, value, 0x3a); ++ unsigned int index = _mm_cmpistri (value, value, 0x3a); + if (index < 16 - offset) + RETURN (NULL, index); + aligned += 16; + } +- else +- aligned = s; + ++start_loop: + while (1) + { + __m128i value = _mm_load_si128 ((__m128i *) aligned); +- int index = _mm_cmpistri (mask, value, 0x2); +- int cflag = _mm_cmpistrc (mask, value, 0x2); +- int zflag = _mm_cmpistrz (mask, value, 0x2); ++ unsigned int index = _mm_cmpistri (mask, value, 0x2); ++ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); ++ unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); + if (cflag) + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); + if (zflag) diff --git a/SOURCES/glibc-upstream-2.34-221.patch b/SOURCES/glibc-upstream-2.34-221.patch new file mode 100644 index 0000000..c4b411b --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-221.patch @@ -0,0 +1,143 @@ +commit 0a2da0111037b1cc214f8f40ca5bdebf36f35cbd +Author: Noah Goldstein +Date: Wed Mar 23 16:57:24 2022 -0500 + + x86: Optimize strspn in strspn-c.c + + Use _mm_cmpeq_epi8 and _mm_movemask_epi8 to get strlen instead of + _mm_cmpistri. Also change offset to unsigned to avoid unnecessary + sign extensions. + + geometric_mean(N=20) of all benchmarks that dont fallback on + sse2; New / Original: .901 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 412d10343168b05b8cf6c3683457cf9711d28046) + +diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c +index a17196296b9ebe52..3bcc479f1b52ff6a 100644 +--- a/sysdeps/x86_64/multiarch/strspn-c.c ++++ b/sysdeps/x86_64/multiarch/strspn-c.c +@@ -63,81 +63,73 @@ __strspn_sse42 (const char *s, const char *a) + return 0; + + const char *aligned; +- __m128i mask; +- int offset = (int) ((size_t) a & 15); ++ __m128i mask, maskz, zero; ++ unsigned int maskz_bits; ++ unsigned int offset = (int) ((size_t) a & 15); ++ zero = _mm_set1_epi8 (0); + if (offset != 0) + { + /* Load masks. */ + aligned = (const char *) ((size_t) a & -16L); + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); +- +- mask = __m128i_shift_right (mask0, offset); ++ maskz = _mm_cmpeq_epi8 (mask0, zero); + + /* Find where the NULL terminator is. */ +- int length = _mm_cmpistri (mask, mask, 0x3a); +- if (length == 16 - offset) +- { +- /* There is no NULL terminator. */ +- __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16)); +- int index = _mm_cmpistri (mask1, mask1, 0x3a); +- length += index; +- +- /* Don't use SSE4.2 if the length of A > 16. */ +- if (length > 16) +- return __strspn_sse2 (s, a); +- +- if (index != 0) +- { +- /* Combine mask0 and mask1. We could play games with +- palignr, but frankly this data should be in L1 now +- so do the merge via an unaligned load. */ +- mask = _mm_loadu_si128 ((__m128i *) a); +- } +- } ++ maskz_bits = _mm_movemask_epi8 (maskz) >> offset; ++ if (maskz_bits != 0) ++ { ++ mask = __m128i_shift_right (mask0, offset); ++ offset = (unsigned int) ((size_t) s & 15); ++ if (offset) ++ goto start_unaligned; ++ ++ aligned = s; ++ goto start_loop; ++ } + } +- else +- { +- /* A is aligned. */ +- mask = _mm_load_si128 ((__m128i *) a); + +- /* Find where the NULL terminator is. */ +- int length = _mm_cmpistri (mask, mask, 0x3a); +- if (length == 16) +- { +- /* There is no NULL terminator. Don't use SSE4.2 if the length +- of A > 16. */ +- if (a[16] != 0) +- return __strspn_sse2 (s, a); +- } ++ /* A is aligned. */ ++ mask = _mm_loadu_si128 ((__m128i *) a); ++ ++ /* Find where the NULL terminator is. */ ++ maskz = _mm_cmpeq_epi8 (mask, zero); ++ maskz_bits = _mm_movemask_epi8 (maskz); ++ if (maskz_bits == 0) ++ { ++ /* There is no NULL terminator. Don't use SSE4.2 if the length ++ of A > 16. */ ++ if (a[16] != 0) ++ return __strspn_sse2 (s, a); + } ++ aligned = s; ++ offset = (unsigned int) ((size_t) s & 15); + +- offset = (int) ((size_t) s & 15); + if (offset != 0) + { ++ start_unaligned: + /* Check partial string. */ + aligned = (const char *) ((size_t) s & -16L); + __m128i value = _mm_load_si128 ((__m128i *) aligned); ++ __m128i adj_value = __m128i_shift_right (value, offset); + +- value = __m128i_shift_right (value, offset); +- +- int length = _mm_cmpistri (mask, value, 0x12); ++ unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); + /* No need to check CFlag since it is always 1. */ + if (length < 16 - offset) + return length; + /* Find where the NULL terminator is. */ +- int index = _mm_cmpistri (value, value, 0x3a); +- if (index < 16 - offset) ++ maskz = _mm_cmpeq_epi8 (value, zero); ++ maskz_bits = _mm_movemask_epi8 (maskz) >> offset; ++ if (maskz_bits != 0) + return length; + aligned += 16; + } +- else +- aligned = s; + ++start_loop: + while (1) + { + __m128i value = _mm_load_si128 ((__m128i *) aligned); +- int index = _mm_cmpistri (mask, value, 0x12); +- int cflag = _mm_cmpistrc (mask, value, 0x12); ++ unsigned int index = _mm_cmpistri (mask, value, 0x12); ++ unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); + if (cflag) + return (size_t) (aligned + index - s); + aligned += 16; diff --git a/SOURCES/glibc-upstream-2.34-222.patch b/SOURCES/glibc-upstream-2.34-222.patch new file mode 100644 index 0000000..4b54799 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-222.patch @@ -0,0 +1,164 @@ +commit 0dafa75e3c42994d0f23db62651d1802577272f2 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:26 2022 -0500 + + x86: Remove strcspn-sse2.S and use the generic implementation + + The generic implementation is faster. + + geometric_mean(N=20) of all benchmarks New / Original: .678 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit fe28e7d9d9535ebab4081d195c553b4fbf39d9ae) + +diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c +similarity index 89% +rename from sysdeps/x86_64/multiarch/strcspn-sse2.S +rename to sysdeps/x86_64/multiarch/strcspn-sse2.c +index 63b260a9ed265230..9bd3dac82d90b3a5 100644 +--- a/sysdeps/x86_64/multiarch/strcspn-sse2.S ++++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c +@@ -19,10 +19,10 @@ + #if IS_IN (libc) + + # include +-# define strcspn __strcspn_sse2 ++# define STRCSPN __strcspn_sse2 + + # undef libc_hidden_builtin_def +-# define libc_hidden_builtin_def(strcspn) ++# define libc_hidden_builtin_def(STRCSPN) + #endif + +-#include ++#include +diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S +deleted file mode 100644 +index 6035a274c87bafb0..0000000000000000 +--- a/sysdeps/x86_64/strcspn.S ++++ /dev/null +@@ -1,122 +0,0 @@ +-/* strcspn (str, ss) -- Return the length of the initial segment of STR +- which contains no characters from SS. +- For AMD x86-64. +- Copyright (C) 1994-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper . +- Bug fixes by Alan Modra . +- Adopted for x86-64 by Andreas Jaeger . +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include "asm-syntax.h" +- +- .text +-ENTRY (strcspn) +- +- movq %rdi, %rdx /* Save SRC. */ +- +- /* First we create a table with flags for all possible characters. +- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are +- supported by the C string functions we have 256 characters. +- Before inserting marks for the stop characters we clear the whole +- table. */ +- movq %rdi, %r8 /* Save value. */ +- subq $256, %rsp /* Make space for 256 bytes. */ +- cfi_adjust_cfa_offset(256) +- movl $32, %ecx /* 32*8 bytes = 256 bytes. */ +- movq %rsp, %rdi +- xorl %eax, %eax /* We store 0s. */ +- cld +- rep +- stosq +- +- movq %rsi, %rax /* Setup skipset. */ +- +-/* For understanding the following code remember that %rcx == 0 now. +- Although all the following instruction only modify %cl we always +- have a correct zero-extended 64-bit value in %rcx. */ +- +- .p2align 4 +-L(2): movb (%rax), %cl /* get byte from skipset */ +- testb %cl, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +- +- movb 1(%rax), %cl /* get byte from skipset */ +- testb $0xff, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +- +- movb 2(%rax), %cl /* get byte from skipset */ +- testb $0xff, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +- +- movb 3(%rax), %cl /* get byte from skipset */ +- addq $4, %rax /* increment skipset pointer */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */ +- testb $0xff, %cl /* is NUL char? */ +- jnz L(2) /* no => process next dword from skipset */ +- +-L(1): leaq -4(%rdx), %rax /* prepare loop */ +- +- /* We use a neat trick for the following loop. Normally we would +- have to test for two termination conditions +- 1. a character in the skipset was found +- and +- 2. the end of the string was found +- But as a sign that the character is in the skipset we store its +- value in the table. But the value of NUL is NUL so the loop +- terminates for NUL in every case. */ +- +- .p2align 4 +-L(3): addq $4, %rax /* adjust pointer for full loop round */ +- +- movb (%rax), %cl /* get byte from string */ +- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- je L(4) /* yes => return */ +- +- movb 1(%rax), %cl /* get byte from string */ +- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- je L(5) /* yes => return */ +- +- movb 2(%rax), %cl /* get byte from string */ +- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jz L(6) /* yes => return */ +- +- movb 3(%rax), %cl /* get byte from string */ +- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jne L(3) /* no => start loop again */ +- +- incq %rax /* adjust pointer */ +-L(6): incq %rax +-L(5): incq %rax +- +-L(4): addq $256, %rsp /* remove skipset */ +- cfi_adjust_cfa_offset(-256) +-#ifdef USE_AS_STRPBRK +- xorl %edx,%edx +- orb %cl, %cl /* was last character NUL? */ +- cmovzq %rdx, %rax /* Yes: return NULL */ +-#else +- subq %rdx, %rax /* we have to return the number of valid +- characters, so compute distance to first +- non-valid character */ +-#endif +- ret +-END (strcspn) +-libc_hidden_builtin_def (strcspn) diff --git a/SOURCES/glibc-upstream-2.34-223.patch b/SOURCES/glibc-upstream-2.34-223.patch new file mode 100644 index 0000000..42accca --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-223.patch @@ -0,0 +1,44 @@ +commit 38115446558e6d0976299eb592ba7266681c27d5 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:27 2022 -0500 + + x86: Remove strpbrk-sse2.S and use the generic implementation + + The generic implementation is faster (see strcspn commit). + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 653358535280a599382cb6c77538a187dac6a87f) + +diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.S b/sysdeps/x86_64/multiarch/strpbrk-sse2.c +similarity index 87% +rename from sysdeps/x86_64/multiarch/strpbrk-sse2.S +rename to sysdeps/x86_64/multiarch/strpbrk-sse2.c +index c5b95d08ff09cb27..8a58f051c35163dd 100644 +--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.S ++++ b/sysdeps/x86_64/multiarch/strpbrk-sse2.c +@@ -19,11 +19,10 @@ + #if IS_IN (libc) + + # include +-# define strcspn __strpbrk_sse2 ++# define STRPBRK __strpbrk_sse2 + + # undef libc_hidden_builtin_def +-# define libc_hidden_builtin_def(strpbrk) ++# define libc_hidden_builtin_def(STRPBRK) + #endif + +-#define USE_AS_STRPBRK +-#include ++#include +diff --git a/sysdeps/x86_64/strpbrk.S b/sysdeps/x86_64/strpbrk.S +deleted file mode 100644 +index 21888a5b923974f9..0000000000000000 +--- a/sysdeps/x86_64/strpbrk.S ++++ /dev/null +@@ -1,3 +0,0 @@ +-#define strcspn strpbrk +-#define USE_AS_STRPBRK +-#include diff --git a/SOURCES/glibc-upstream-2.34-224.patch b/SOURCES/glibc-upstream-2.34-224.patch new file mode 100644 index 0000000..764dd6d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-224.patch @@ -0,0 +1,157 @@ +commit a4b1cae068d4d6e3117dd49e7d0599e4c62ac39f +Author: Noah Goldstein +Date: Wed Mar 23 16:57:29 2022 -0500 + + x86: Remove strspn-sse2.S and use the generic implementation + + The generic implementation is faster. + + geometric_mean(N=20) of all benchmarks New / Original: .710 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 9c8a6ad620b49a27120ecdd7049c26bf05900397) + +diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.S b/sysdeps/x86_64/multiarch/strspn-sse2.c +similarity index 89% +rename from sysdeps/x86_64/multiarch/strspn-sse2.S +rename to sysdeps/x86_64/multiarch/strspn-sse2.c +index e919fe492cc15151..f5e5686db1037740 100644 +--- a/sysdeps/x86_64/multiarch/strspn-sse2.S ++++ b/sysdeps/x86_64/multiarch/strspn-sse2.c +@@ -19,10 +19,10 @@ + #if IS_IN (libc) + + # include +-# define strspn __strspn_sse2 ++# define STRSPN __strspn_sse2 + + # undef libc_hidden_builtin_def +-# define libc_hidden_builtin_def(strspn) ++# define libc_hidden_builtin_def(STRSPN) + #endif + +-#include ++#include +diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S +deleted file mode 100644 +index e878f328852792db..0000000000000000 +--- a/sysdeps/x86_64/strspn.S ++++ /dev/null +@@ -1,115 +0,0 @@ +-/* strspn (str, ss) -- Return the length of the initial segment of STR +- which contains only characters from SS. +- For AMD x86-64. +- Copyright (C) 1994-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper . +- Bug fixes by Alan Modra . +- Adopted for x86-64 by Andreas Jaeger . +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +- +- .text +-ENTRY (strspn) +- +- movq %rdi, %rdx /* Save SRC. */ +- +- /* First we create a table with flags for all possible characters. +- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are +- supported by the C string functions we have 256 characters. +- Before inserting marks for the stop characters we clear the whole +- table. */ +- movq %rdi, %r8 /* Save value. */ +- subq $256, %rsp /* Make space for 256 bytes. */ +- cfi_adjust_cfa_offset(256) +- movl $32, %ecx /* 32*8 bytes = 256 bytes. */ +- movq %rsp, %rdi +- xorl %eax, %eax /* We store 0s. */ +- cld +- rep +- stosq +- +- movq %rsi, %rax /* Setup stopset. */ +- +-/* For understanding the following code remember that %rcx == 0 now. +- Although all the following instruction only modify %cl we always +- have a correct zero-extended 64-bit value in %rcx. */ +- +- .p2align 4 +-L(2): movb (%rax), %cl /* get byte from stopset */ +- testb %cl, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +- +- movb 1(%rax), %cl /* get byte from stopset */ +- testb $0xff, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +- +- movb 2(%rax), %cl /* get byte from stopset */ +- testb $0xff, %cl /* is NUL char? */ +- jz L(1) /* yes => start compare loop */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +- +- movb 3(%rax), %cl /* get byte from stopset */ +- addq $4, %rax /* increment stopset pointer */ +- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ +- testb $0xff, %cl /* is NUL char? */ +- jnz L(2) /* no => process next dword from stopset */ +- +-L(1): leaq -4(%rdx), %rax /* prepare loop */ +- +- /* We use a neat trick for the following loop. Normally we would +- have to test for two termination conditions +- 1. a character in the stopset was found +- and +- 2. the end of the string was found +- But as a sign that the character is in the stopset we store its +- value in the table. But the value of NUL is NUL so the loop +- terminates for NUL in every case. */ +- +- .p2align 4 +-L(3): addq $4, %rax /* adjust pointer for full loop round */ +- +- movb (%rax), %cl /* get byte from string */ +- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jz L(4) /* no => return */ +- +- movb 1(%rax), %cl /* get byte from string */ +- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jz L(5) /* no => return */ +- +- movb 2(%rax), %cl /* get byte from string */ +- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jz L(6) /* no => return */ +- +- movb 3(%rax), %cl /* get byte from string */ +- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ +- jnz L(3) /* yes => start loop again */ +- +- incq %rax /* adjust pointer */ +-L(6): incq %rax +-L(5): incq %rax +- +-L(4): addq $256, %rsp /* remove stopset */ +- cfi_adjust_cfa_offset(-256) +- subq %rdx, %rax /* we have to return the number of valid +- characters, so compute distance to first +- non-valid character */ +- ret +-END (strspn) +-libc_hidden_builtin_def (strspn) diff --git a/SOURCES/glibc-upstream-2.34-225.patch b/SOURCES/glibc-upstream-2.34-225.patch new file mode 100644 index 0000000..61ccb20 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-225.patch @@ -0,0 +1,118 @@ +commit 5997011826b7bbb7015f56bf143a6e4fd0f5a7df +Author: Noah Goldstein +Date: Wed Mar 23 16:57:36 2022 -0500 + + x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S + + Slightly faster method of doing TOLOWER that saves an + instruction. + + Also replace the hard coded 5-byte no with .p2align 4. On builds with + CET enabled this misaligned entry to strcasecmp. + + geometric_mean(N=40) of all benchmarks New / Original: .894 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 670b54bc585ea4a94f3b2e9272ba44aa6b730b73) + +diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S +index 7f8a1bc756f86aee..ca70b540eb2dd190 100644 +--- a/sysdeps/x86_64/strcmp.S ++++ b/sysdeps/x86_64/strcmp.S +@@ -78,9 +78,8 @@ ENTRY2 (__strcasecmp) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + mov %fs:(%rax),%RDX_LP + +- // XXX 5 byte should be before the function +- /* 5-byte NOP. */ +- .byte 0x0f,0x1f,0x44,0x00,0x00 ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 + END2 (__strcasecmp) + # ifndef NO_NOLOCALE_ALIAS + weak_alias (__strcasecmp, strcasecmp) +@@ -97,9 +96,8 @@ ENTRY2 (__strncasecmp) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + mov %fs:(%rax),%RCX_LP + +- // XXX 5 byte should be before the function +- /* 5-byte NOP. */ +- .byte 0x0f,0x1f,0x44,0x00,0x00 ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 + END2 (__strncasecmp) + # ifndef NO_NOLOCALE_ALIAS + weak_alias (__strncasecmp, strncasecmp) +@@ -149,22 +147,22 @@ ENTRY (STRCMP) + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +-.Lbelowupper: +- .quad 0x4040404040404040 +- .quad 0x4040404040404040 +-.Ltopupper: +- .quad 0x5b5b5b5b5b5b5b5b +- .quad 0x5b5b5b5b5b5b5b5b +-.Ltouppermask: ++.Llcase_min: ++ .quad 0x3f3f3f3f3f3f3f3f ++ .quad 0x3f3f3f3f3f3f3f3f ++.Llcase_max: ++ .quad 0x9999999999999999 ++ .quad 0x9999999999999999 ++.Lcase_add: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous +- movdqa .Lbelowupper(%rip), %xmm5 +-# define UCLOW_reg %xmm5 +- movdqa .Ltopupper(%rip), %xmm6 +-# define UCHIGH_reg %xmm6 +- movdqa .Ltouppermask(%rip), %xmm7 +-# define LCQWORD_reg %xmm7 ++ movdqa .Llcase_min(%rip), %xmm5 ++# define LCASE_MIN_reg %xmm5 ++ movdqa .Llcase_max(%rip), %xmm6 ++# define LCASE_MAX_reg %xmm6 ++ movdqa .Lcase_add(%rip), %xmm7 ++# define CASE_ADD_reg %xmm7 + #endif + cmp $0x30, %ecx + ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ +@@ -175,22 +173,18 @@ ENTRY (STRCMP) + movhpd 8(%rdi), %xmm1 + movhpd 8(%rsi), %xmm2 + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +-# define TOLOWER(reg1, reg2) \ +- movdqa reg1, %xmm8; \ +- movdqa UCHIGH_reg, %xmm9; \ +- movdqa reg2, %xmm10; \ +- movdqa UCHIGH_reg, %xmm11; \ +- pcmpgtb UCLOW_reg, %xmm8; \ +- pcmpgtb reg1, %xmm9; \ +- pcmpgtb UCLOW_reg, %xmm10; \ +- pcmpgtb reg2, %xmm11; \ +- pand %xmm9, %xmm8; \ +- pand %xmm11, %xmm10; \ +- pand LCQWORD_reg, %xmm8; \ +- pand LCQWORD_reg, %xmm10; \ +- por %xmm8, reg1; \ +- por %xmm10, reg2 +- TOLOWER (%xmm1, %xmm2) ++# define TOLOWER(reg1, reg2) \ ++ movdqa LCASE_MIN_reg, %xmm8; \ ++ movdqa LCASE_MIN_reg, %xmm9; \ ++ paddb reg1, %xmm8; \ ++ paddb reg2, %xmm9; \ ++ pcmpgtb LCASE_MAX_reg, %xmm8; \ ++ pcmpgtb LCASE_MAX_reg, %xmm9; \ ++ pandn CASE_ADD_reg, %xmm8; \ ++ pandn CASE_ADD_reg, %xmm9; \ ++ paddb %xmm8, reg1; \ ++ paddb %xmm9, reg2 ++ TOLOWER (%xmm1, %xmm2) + #else + # define TOLOWER(reg1, reg2) + #endif diff --git a/SOURCES/glibc-upstream-2.34-226.patch b/SOURCES/glibc-upstream-2.34-226.patch new file mode 100644 index 0000000..fcadc66 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-226.patch @@ -0,0 +1,139 @@ +commit 3605c744078bb048d876298aaf12a2869e8071b8 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:38 2022 -0500 + + x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S + + Slightly faster method of doing TOLOWER that saves an + instruction. + + Also replace the hard coded 5-byte no with .p2align 4. On builds with + CET enabled this misaligned entry to strcasecmp. + + geometric_mean(N=40) of all benchmarks New / Original: .920 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit d154758e618ec9324f5d339c46db0aa27e8b1226) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S +index 6197a723b9e0606e..a6825de8195ad8c6 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S ++++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S +@@ -89,9 +89,8 @@ ENTRY (GLABEL(__strcasecmp)) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + mov %fs:(%rax),%RDX_LP + +- // XXX 5 byte should be before the function +- /* 5-byte NOP. */ +- .byte 0x0f,0x1f,0x44,0x00,0x00 ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 + END (GLABEL(__strcasecmp)) + /* FALLTHROUGH to strcasecmp_l. */ + #endif +@@ -100,9 +99,8 @@ ENTRY (GLABEL(__strncasecmp)) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + mov %fs:(%rax),%RCX_LP + +- // XXX 5 byte should be before the function +- /* 5-byte NOP. */ +- .byte 0x0f,0x1f,0x44,0x00,0x00 ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 + END (GLABEL(__strncasecmp)) + /* FALLTHROUGH to strncasecmp_l. */ + #endif +@@ -170,27 +168,22 @@ STRCMP_SSE42: + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +-LABEL(belowupper): +- .quad 0x4040404040404040 +- .quad 0x4040404040404040 +-LABEL(topupper): +-# ifdef USE_AVX +- .quad 0x5a5a5a5a5a5a5a5a +- .quad 0x5a5a5a5a5a5a5a5a +-# else +- .quad 0x5b5b5b5b5b5b5b5b +- .quad 0x5b5b5b5b5b5b5b5b +-# endif +-LABEL(touppermask): ++LABEL(lcase_min): ++ .quad 0x3f3f3f3f3f3f3f3f ++ .quad 0x3f3f3f3f3f3f3f3f ++LABEL(lcase_max): ++ .quad 0x9999999999999999 ++ .quad 0x9999999999999999 ++LABEL(case_add): + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous +- movdqa LABEL(belowupper)(%rip), %xmm4 +-# define UCLOW_reg %xmm4 +- movdqa LABEL(topupper)(%rip), %xmm5 +-# define UCHIGH_reg %xmm5 +- movdqa LABEL(touppermask)(%rip), %xmm6 +-# define LCQWORD_reg %xmm6 ++ movdqa LABEL(lcase_min)(%rip), %xmm4 ++# define LCASE_MIN_reg %xmm4 ++ movdqa LABEL(lcase_max)(%rip), %xmm5 ++# define LCASE_MAX_reg %xmm5 ++ movdqa LABEL(case_add)(%rip), %xmm6 ++# define CASE_ADD_reg %xmm6 + #endif + cmp $0x30, %ecx + ja LABEL(crosscache)/* rsi: 16-byte load will cross cache line */ +@@ -201,32 +194,26 @@ LABEL(touppermask): + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + # ifdef USE_AVX + # define TOLOWER(reg1, reg2) \ +- vpcmpgtb UCLOW_reg, reg1, %xmm7; \ +- vpcmpgtb UCHIGH_reg, reg1, %xmm8; \ +- vpcmpgtb UCLOW_reg, reg2, %xmm9; \ +- vpcmpgtb UCHIGH_reg, reg2, %xmm10; \ +- vpandn %xmm7, %xmm8, %xmm8; \ +- vpandn %xmm9, %xmm10, %xmm10; \ +- vpand LCQWORD_reg, %xmm8, %xmm8; \ +- vpand LCQWORD_reg, %xmm10, %xmm10; \ +- vpor reg1, %xmm8, reg1; \ +- vpor reg2, %xmm10, reg2 ++ vpaddb LCASE_MIN_reg, reg1, %xmm7; \ ++ vpaddb LCASE_MIN_reg, reg2, %xmm8; \ ++ vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \ ++ vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \ ++ vpandn CASE_ADD_reg, %xmm7, %xmm7; \ ++ vpandn CASE_ADD_reg, %xmm8, %xmm8; \ ++ vpaddb %xmm7, reg1, reg1; \ ++ vpaddb %xmm8, reg2, reg2 + # else + # define TOLOWER(reg1, reg2) \ +- movdqa reg1, %xmm7; \ +- movdqa UCHIGH_reg, %xmm8; \ +- movdqa reg2, %xmm9; \ +- movdqa UCHIGH_reg, %xmm10; \ +- pcmpgtb UCLOW_reg, %xmm7; \ +- pcmpgtb reg1, %xmm8; \ +- pcmpgtb UCLOW_reg, %xmm9; \ +- pcmpgtb reg2, %xmm10; \ +- pand %xmm8, %xmm7; \ +- pand %xmm10, %xmm9; \ +- pand LCQWORD_reg, %xmm7; \ +- pand LCQWORD_reg, %xmm9; \ +- por %xmm7, reg1; \ +- por %xmm9, reg2 ++ movdqa LCASE_MIN_reg, %xmm7; \ ++ movdqa LCASE_MIN_reg, %xmm8; \ ++ paddb reg1, %xmm7; \ ++ paddb reg2, %xmm8; \ ++ pcmpgtb LCASE_MAX_reg, %xmm7; \ ++ pcmpgtb LCASE_MAX_reg, %xmm8; \ ++ pandn CASE_ADD_reg, %xmm7; \ ++ pandn CASE_ADD_reg, %xmm8; \ ++ paddb %xmm7, reg1; \ ++ paddb %xmm8, reg2 + # endif + TOLOWER (%xmm1, %xmm2) + #else diff --git a/SOURCES/glibc-upstream-2.34-227.patch b/SOURCES/glibc-upstream-2.34-227.patch new file mode 100644 index 0000000..9dd23aa --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-227.patch @@ -0,0 +1,744 @@ +commit 3051cf3e745015a9106cf71be7f7adbb2f83fcac +Author: Noah Goldstein +Date: Thu Mar 24 18:56:12 2022 -0500 + + x86: Add AVX2 optimized str{n}casecmp + + geometric_mean(N=40) of all benchmarks AVX2 / SSE42: .702 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit bbf81222343fed5cd704001a2ae0d86c71544151) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 8c9e7812c6af10b8..711ecf2ee45d61b9 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -51,6 +51,8 @@ sysdep_routines += \ + stpncpy-sse2-unaligned \ + stpncpy-ssse3 \ + strcasecmp_l-avx \ ++ strcasecmp_l-avx2 \ ++ strcasecmp_l-avx2-rtm \ + strcasecmp_l-sse2 \ + strcasecmp_l-sse4_2 \ + strcasecmp_l-ssse3 \ +@@ -89,6 +91,8 @@ sysdep_routines += \ + strlen-evex \ + strlen-sse2 \ + strncase_l-avx \ ++ strncase_l-avx2 \ ++ strncase_l-avx2-rtm \ + strncase_l-sse2 \ + strncase_l-sse4_2 \ + strncase_l-ssse3 \ +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index 4992d7bd3206a7c0..a687b387c91aa9ae 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -418,6 +418,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp, ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ CPU_FEATURE_USABLE (AVX2), ++ __strcasecmp_avx2) ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strcasecmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcasecmp, + CPU_FEATURE_USABLE (AVX), + __strcasecmp_avx) +@@ -431,6 +438,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ CPU_FEATURE_USABLE (AVX2), ++ __strcasecmp_l_avx2) ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strcasecmp_l_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + CPU_FEATURE_USABLE (AVX), + __strcasecmp_l_avx) +@@ -558,6 +572,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp, ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ CPU_FEATURE_USABLE (AVX2), ++ __strncasecmp_avx2) ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strncasecmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncasecmp, + CPU_FEATURE_USABLE (AVX), + __strncasecmp_avx) +@@ -572,6 +593,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ CPU_FEATURE_USABLE (AVX2), ++ __strncasecmp_l_avx2) ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __strncasecmp_l_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + CPU_FEATURE_USABLE (AVX), + __strncasecmp_l_avx) +diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +index 931770e079fcc69f..64d0cd6ef25f73c0 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h ++++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +@@ -23,12 +23,24 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) ++ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) ++ return OPTIMIZE (avx2_rtm); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) ++ return OPTIMIZE (avx2); ++ } ++ + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) + return OPTIMIZE (avx); + +diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S +new file mode 100644 +index 0000000000000000..09957fc3c543b40c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2-rtm.S +@@ -0,0 +1,15 @@ ++#ifndef STRCMP ++# define STRCMP __strcasecmp_l_avx2_rtm ++#endif ++ ++#define _GLABEL(x) x ## _rtm ++#define GLABEL(x) _GLABEL(x) ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++ ++#include "strcasecmp_l-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S +new file mode 100644 +index 0000000000000000..e2762f2a222b2a65 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx2.S +@@ -0,0 +1,23 @@ ++/* strcasecmp_l optimized with AVX2. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef STRCMP ++# define STRCMP __strcasecmp_l_avx2 ++#endif ++#define USE_AS_STRCASECMP_L ++#include "strcmp-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index 09a73942086f9c9f..aa91f6e48a0e1ce5 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -20,6 +20,10 @@ + + # include + ++# if defined USE_AS_STRCASECMP_L ++# include "locale-defines.h" ++# endif ++ + # ifndef STRCMP + # define STRCMP __strcmp_avx2 + # endif +@@ -74,13 +78,88 @@ + # define VEC_OFFSET (-VEC_SIZE) + # endif + ++# ifdef USE_AS_STRCASECMP_L ++# define BYTE_LOOP_REG OFFSET_REG ++# else ++# define BYTE_LOOP_REG ecx ++# endif ++ ++# ifdef USE_AS_STRCASECMP_L ++# ifdef USE_AS_STRNCMP ++# define STRCASECMP __strncasecmp_avx2 ++# define LOCALE_REG rcx ++# define LOCALE_REG_LP RCX_LP ++# define STRCASECMP_NONASCII __strncasecmp_l_nonascii ++# else ++# define STRCASECMP __strcasecmp_avx2 ++# define LOCALE_REG rdx ++# define LOCALE_REG_LP RDX_LP ++# define STRCASECMP_NONASCII __strcasecmp_l_nonascii ++# endif ++# endif ++ + # define xmmZERO xmm15 + # define ymmZERO ymm15 + ++# define LCASE_MIN_ymm %ymm10 ++# define LCASE_MAX_ymm %ymm11 ++# define CASE_ADD_ymm %ymm12 ++ ++# define LCASE_MIN_xmm %xmm10 ++# define LCASE_MAX_xmm %xmm11 ++# define CASE_ADD_xmm %xmm12 ++ ++ /* r11 is never use elsewhere so this is safe to maintain. */ ++# define TOLOWER_BASE %r11 ++ + # ifndef SECTION + # define SECTION(p) p##.avx + # endif + ++# ifdef USE_AS_STRCASECMP_L ++# define REG(x, y) x ## y ++# define TOLOWER(reg1_in, reg1_out, reg2_in, reg2_out, ext) \ ++ vpaddb REG(LCASE_MIN_, ext), reg1_in, REG(%ext, 8); \ ++ vpaddb REG(LCASE_MIN_, ext), reg2_in, REG(%ext, 9); \ ++ vpcmpgtb REG(LCASE_MAX_, ext), REG(%ext, 8), REG(%ext, 8); \ ++ vpcmpgtb REG(LCASE_MAX_, ext), REG(%ext, 9), REG(%ext, 9); \ ++ vpandn REG(CASE_ADD_, ext), REG(%ext, 8), REG(%ext, 8); \ ++ vpandn REG(CASE_ADD_, ext), REG(%ext, 9), REG(%ext, 9); \ ++ vpaddb REG(%ext, 8), reg1_in, reg1_out; \ ++ vpaddb REG(%ext, 9), reg2_in, reg2_out ++ ++# define TOLOWER_gpr(src, dst) movl (TOLOWER_BASE, src, 4), dst ++# define TOLOWER_ymm(...) TOLOWER(__VA_ARGS__, ymm) ++# define TOLOWER_xmm(...) TOLOWER(__VA_ARGS__, xmm) ++ ++# define CMP_R1_R2(s1_reg, s2_reg, scratch_reg, reg_out, ext) \ ++ TOLOWER (s1_reg, scratch_reg, s2_reg, s2_reg, ext); \ ++ VPCMPEQ scratch_reg, s2_reg, reg_out ++ ++# define CMP_R1_S2(s1_reg, s2_mem, scratch_reg, reg_out, ext) \ ++ VMOVU s2_mem, reg_out; \ ++ CMP_R1_R2(s1_reg, reg_out, scratch_reg, reg_out, ext) ++ ++# define CMP_R1_R2_ymm(...) CMP_R1_R2(__VA_ARGS__, ymm) ++# define CMP_R1_R2_xmm(...) CMP_R1_R2(__VA_ARGS__, xmm) ++ ++# define CMP_R1_S2_ymm(...) CMP_R1_S2(__VA_ARGS__, ymm) ++# define CMP_R1_S2_xmm(...) CMP_R1_S2(__VA_ARGS__, xmm) ++ ++# else ++# define TOLOWER_gpr(...) ++# define TOLOWER_ymm(...) ++# define TOLOWER_xmm(...) ++ ++# define CMP_R1_R2_ymm(s1_reg, s2_reg, scratch_reg, reg_out) \ ++ VPCMPEQ s2_reg, s1_reg, reg_out ++ ++# define CMP_R1_R2_xmm(...) CMP_R1_R2_ymm(__VA_ARGS__) ++ ++# define CMP_R1_S2_ymm(...) CMP_R1_R2_ymm(__VA_ARGS__) ++# define CMP_R1_S2_xmm(...) CMP_R1_R2_xmm(__VA_ARGS__) ++# endif ++ + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. + strcmp/strncmp have to use UNSIGNED comparison for elements. +@@ -102,8 +181,49 @@ + returned. */ + + .section SECTION(.text), "ax", @progbits +-ENTRY(STRCMP) ++ .align 16 ++ .type STRCMP, @function ++ .globl STRCMP ++ .hidden STRCMP ++ ++# ifndef GLABEL ++# define GLABEL(...) __VA_ARGS__ ++# endif ++ ++# ifdef USE_AS_STRCASECMP_L ++ENTRY (GLABEL(STRCASECMP)) ++ movq __libc_tsd_LOCALE@gottpoff(%rip), %rax ++ mov %fs:(%rax), %LOCALE_REG_LP ++ ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 ++END (GLABEL(STRCASECMP)) ++ /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ ++# endif ++ ++ .p2align 4 ++STRCMP: ++ cfi_startproc ++ _CET_ENDBR ++ CALL_MCOUNT ++ ++# if defined USE_AS_STRCASECMP_L ++ /* We have to fall back on the C implementation for locales with ++ encodings not matching ASCII for single bytes. */ ++# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 ++ mov LOCALE_T___LOCALES + LC_CTYPE * LP_SIZE(%LOCALE_REG), %RAX_LP ++# else ++ mov (%LOCALE_REG), %RAX_LP ++# endif ++ testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax) ++ jne STRCASECMP_NONASCII ++ leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE ++# endif ++ + # ifdef USE_AS_STRNCMP ++ /* Don't overwrite LOCALE_REG (rcx) until we have pass ++ L(one_or_less). Otherwise we might use the wrong locale in ++ the OVERFLOW_STRCMP (strcasecmp_l). */ + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +@@ -128,6 +248,30 @@ ENTRY(STRCMP) + # endif + # endif + vpxor %xmmZERO, %xmmZERO, %xmmZERO ++# if defined USE_AS_STRCASECMP_L ++ .section .rodata.cst32, "aM", @progbits, 32 ++ .align 32 ++L(lcase_min): ++ .quad 0x3f3f3f3f3f3f3f3f ++ .quad 0x3f3f3f3f3f3f3f3f ++ .quad 0x3f3f3f3f3f3f3f3f ++ .quad 0x3f3f3f3f3f3f3f3f ++L(lcase_max): ++ .quad 0x9999999999999999 ++ .quad 0x9999999999999999 ++ .quad 0x9999999999999999 ++ .quad 0x9999999999999999 ++L(case_add): ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .previous ++ ++ vmovdqa L(lcase_min)(%rip), LCASE_MIN_ymm ++ vmovdqa L(lcase_max)(%rip), LCASE_MAX_ymm ++ vmovdqa L(case_add)(%rip), CASE_ADD_ymm ++# endif + movl %edi, %eax + orl %esi, %eax + sall $20, %eax +@@ -138,8 +282,10 @@ ENTRY(STRCMP) + L(no_page_cross): + /* Safe to compare 4x vectors. */ + VMOVU (%rdi), %ymm0 +- /* 1s where s1 and s2 equal. */ +- VPCMPEQ (%rsi), %ymm0, %ymm1 ++ /* 1s where s1 and s2 equal. Just VPCMPEQ if its not strcasecmp. ++ Otherwise converts ymm0 and load from rsi to lower. ymm2 is ++ scratch and ymm1 is the return. */ ++ CMP_R1_S2_ymm (%ymm0, (%rsi), %ymm2, %ymm1) + /* 1s at null CHAR. */ + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + /* 1s where s1 and s2 equal AND not null CHAR. */ +@@ -172,6 +318,8 @@ L(return_vec_0): + # else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret0): +@@ -192,6 +340,10 @@ L(ret_zero): + + .p2align 4,, 5 + L(one_or_less): ++# ifdef USE_AS_STRCASECMP_L ++ /* Set locale argument for strcasecmp. */ ++ movq %LOCALE_REG, %rdx ++# endif + jb L(ret_zero) + # ifdef USE_AS_WCSCMP + /* 'nbe' covers the case where length is negative (large +@@ -211,6 +363,8 @@ L(one_or_less): + jnbe __strcmp_avx2 + movzbl (%rdi), %eax + movzbl (%rsi), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret1): +@@ -238,6 +392,8 @@ L(return_vec_1): + # else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret2): +@@ -269,6 +425,8 @@ L(return_vec_2): + # else + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret3): +@@ -289,6 +447,8 @@ L(return_vec_3): + # else + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret4): +@@ -299,7 +459,7 @@ L(ret4): + L(more_3x_vec): + /* Safe to compare 4x vectors. */ + VMOVU VEC_SIZE(%rdi), %ymm0 +- VPCMPEQ VEC_SIZE(%rsi), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, VEC_SIZE(%rsi), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -312,7 +472,7 @@ L(more_3x_vec): + # endif + + VMOVU (VEC_SIZE * 2)(%rdi), %ymm0 +- VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 2)(%rsi), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -320,7 +480,7 @@ L(more_3x_vec): + jnz L(return_vec_2) + + VMOVU (VEC_SIZE * 3)(%rdi), %ymm0 +- VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 3)(%rsi), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -395,12 +555,10 @@ L(loop_skip_page_cross_check): + VMOVA (VEC_SIZE * 3)(%rdi), %ymm6 + + /* ymm1 all 1s where s1 and s2 equal. All 0s otherwise. */ +- VPCMPEQ (VEC_SIZE * 0)(%rsi), %ymm0, %ymm1 +- +- VPCMPEQ (VEC_SIZE * 1)(%rsi), %ymm2, %ymm3 +- VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm4, %ymm5 +- VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm6, %ymm7 +- ++ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 0)(%rsi), %ymm3, %ymm1) ++ CMP_R1_S2_ymm (%ymm2, (VEC_SIZE * 1)(%rsi), %ymm5, %ymm3) ++ CMP_R1_S2_ymm (%ymm4, (VEC_SIZE * 2)(%rsi), %ymm7, %ymm5) ++ CMP_R1_S2_ymm (%ymm6, (VEC_SIZE * 3)(%rsi), %ymm13, %ymm7) + + /* If any mismatches or null CHAR then 0 CHAR, otherwise non- + zero. */ +@@ -469,6 +627,8 @@ L(return_vec_2_3_end): + # else + movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rdi, %LOOP_REG64), %eax + movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rsi, %LOOP_REG64), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -512,6 +672,8 @@ L(return_vec_0_end): + # else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -534,6 +696,8 @@ L(return_vec_1_end): + # else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -560,6 +724,8 @@ L(return_vec_2_end): + # else + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -587,7 +753,7 @@ L(page_cross_during_loop): + jle L(less_1x_vec_till_page_cross) + + VMOVA (%rdi), %ymm0 +- VPCMPEQ (%rsi), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, (%rsi), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -609,7 +775,7 @@ L(less_1x_vec_till_page_cross): + here, it means the previous page (rdi - VEC_SIZE) has already + been loaded earlier so must be valid. */ + VMOVU -VEC_SIZE(%rdi, %rax), %ymm0 +- VPCMPEQ -VEC_SIZE(%rsi, %rax), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, -VEC_SIZE(%rsi, %rax), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -651,6 +817,8 @@ L(return_page_cross_cmp_mem): + # else + movzbl VEC_OFFSET(%rdi, %rcx), %eax + movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -677,7 +845,7 @@ L(more_2x_vec_till_page_cross): + iteration here. */ + + VMOVU VEC_SIZE(%rdi), %ymm0 +- VPCMPEQ VEC_SIZE(%rsi), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, VEC_SIZE(%rsi), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -693,7 +861,7 @@ L(more_2x_vec_till_page_cross): + + /* Safe to include comparisons from lower bytes. */ + VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %ymm0 +- VPCMPEQ -(VEC_SIZE * 2)(%rsi, %rax), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, -(VEC_SIZE * 2)(%rsi, %rax), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -701,7 +869,7 @@ L(more_2x_vec_till_page_cross): + jnz L(return_vec_page_cross_0) + + VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %ymm0 +- VPCMPEQ -(VEC_SIZE * 1)(%rsi, %rax), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, -(VEC_SIZE * 1)(%rsi, %rax), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -719,8 +887,8 @@ L(more_2x_vec_till_page_cross): + VMOVA (VEC_SIZE * 2)(%rdi), %ymm4 + VMOVA (VEC_SIZE * 3)(%rdi), %ymm6 + +- VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm4, %ymm5 +- VPCMPEQ (VEC_SIZE * 3)(%rsi), %ymm6, %ymm7 ++ CMP_R1_S2_ymm (%ymm4, (VEC_SIZE * 2)(%rsi), %ymm7, %ymm5) ++ CMP_R1_S2_ymm (%ymm6, (VEC_SIZE * 3)(%rsi), %ymm13, %ymm7) + vpand %ymm4, %ymm5, %ymm5 + vpand %ymm6, %ymm7, %ymm7 + VPMINU %ymm5, %ymm7, %ymm7 +@@ -771,6 +939,8 @@ L(return_vec_page_cross_1): + # else + movzbl VEC_OFFSET(%rdi, %rcx), %eax + movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -826,7 +996,7 @@ L(page_cross): + L(page_cross_loop): + + VMOVU (%rdi, %OFFSET_REG64), %ymm0 +- VPCMPEQ (%rsi, %OFFSET_REG64), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, (%rsi, %OFFSET_REG64), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -844,11 +1014,11 @@ L(page_cross_loop): + subl %eax, %OFFSET_REG + /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed + to not cross page so is safe to load. Since we have already +- loaded at least 1 VEC from rsi it is also guranteed to be safe. +- */ ++ loaded at least 1 VEC from rsi it is also guranteed to be ++ safe. */ + + VMOVU (%rdi, %OFFSET_REG64), %ymm0 +- VPCMPEQ (%rsi, %OFFSET_REG64), %ymm0, %ymm1 ++ CMP_R1_S2_ymm (%ymm0, (%rsi, %OFFSET_REG64), %ymm2, %ymm1) + VPCMPEQ %ymm0, %ymmZERO, %ymm2 + vpandn %ymm1, %ymm2, %ymm1 + vpmovmskb %ymm1, %ecx +@@ -881,6 +1051,8 @@ L(ret_vec_page_cross_cont): + # else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -934,7 +1106,7 @@ L(less_1x_vec_till_page): + ja L(less_16_till_page) + + VMOVU (%rdi), %xmm0 +- VPCMPEQ (%rsi), %xmm0, %xmm1 ++ CMP_R1_S2_xmm (%xmm0, (%rsi), %xmm2, %xmm1) + VPCMPEQ %xmm0, %xmmZERO, %xmm2 + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx +@@ -952,7 +1124,7 @@ L(less_1x_vec_till_page): + # endif + + VMOVU (%rdi, %OFFSET_REG64), %xmm0 +- VPCMPEQ (%rsi, %OFFSET_REG64), %xmm0, %xmm1 ++ CMP_R1_S2_xmm (%xmm0, (%rsi, %OFFSET_REG64), %xmm2, %xmm1) + VPCMPEQ %xmm0, %xmmZERO, %xmm2 + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx +@@ -990,7 +1162,7 @@ L(less_16_till_page): + vmovq (%rdi), %xmm0 + vmovq (%rsi), %xmm1 + VPCMPEQ %xmm0, %xmmZERO, %xmm2 +- VPCMPEQ %xmm1, %xmm0, %xmm1 ++ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1) + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx + incb %cl +@@ -1010,7 +1182,7 @@ L(less_16_till_page): + vmovq (%rdi, %OFFSET_REG64), %xmm0 + vmovq (%rsi, %OFFSET_REG64), %xmm1 + VPCMPEQ %xmm0, %xmmZERO, %xmm2 +- VPCMPEQ %xmm1, %xmm0, %xmm1 ++ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1) + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx + incb %cl +@@ -1066,7 +1238,7 @@ L(ret_less_8_wcs): + vmovd (%rdi), %xmm0 + vmovd (%rsi), %xmm1 + VPCMPEQ %xmm0, %xmmZERO, %xmm2 +- VPCMPEQ %xmm1, %xmm0, %xmm1 ++ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1) + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx + subl $0xf, %ecx +@@ -1085,7 +1257,7 @@ L(ret_less_8_wcs): + vmovd (%rdi, %OFFSET_REG64), %xmm0 + vmovd (%rsi, %OFFSET_REG64), %xmm1 + VPCMPEQ %xmm0, %xmmZERO, %xmm2 +- VPCMPEQ %xmm1, %xmm0, %xmm1 ++ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1) + vpandn %xmm1, %xmm2, %xmm1 + vpmovmskb %ymm1, %ecx + subl $0xf, %ecx +@@ -1119,7 +1291,9 @@ L(less_4_till_page): + L(less_4_loop): + movzbl (%rdi), %eax + movzbl (%rsi, %rdi), %ecx +- subl %ecx, %eax ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %BYTE_LOOP_REG) ++ subl %BYTE_LOOP_REG, %eax + jnz L(ret_less_4_loop) + testl %ecx, %ecx + jz L(ret_zero_4_loop) +@@ -1146,5 +1320,6 @@ L(ret_less_4_loop): + subl %r8d, %eax + ret + # endif +-END(STRCMP) ++ cfi_endproc ++ .size STRCMP, .-STRCMP + #endif +diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S +new file mode 100644 +index 0000000000000000..58c05dcfb8643791 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncase_l-avx2-rtm.S +@@ -0,0 +1,16 @@ ++#ifndef STRCMP ++# define STRCMP __strncasecmp_l_avx2_rtm ++#endif ++ ++#define _GLABEL(x) x ## _rtm ++#define GLABEL(x) _GLABEL(x) ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define SECTION(p) p##.avx.rtm ++#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm ++ ++#include "strncase_l-avx2.S" +diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx2.S b/sysdeps/x86_64/multiarch/strncase_l-avx2.S +new file mode 100644 +index 0000000000000000..48c0aa21f84ad32c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncase_l-avx2.S +@@ -0,0 +1,27 @@ ++/* strncasecmp_l optimized with AVX2. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef STRCMP ++# define STRCMP __strncasecmp_l_avx2 ++#endif ++#define USE_AS_STRCASECMP_L ++#define USE_AS_STRNCMP ++#ifndef OVERFLOW_STRCMP ++# define OVERFLOW_STRCMP __strcasecmp_l_avx2 ++#endif ++#include "strcmp-avx2.S" diff --git a/SOURCES/glibc-upstream-2.34-228.patch b/SOURCES/glibc-upstream-2.34-228.patch new file mode 100644 index 0000000..dee6598 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-228.patch @@ -0,0 +1,803 @@ +commit b13a2e68eb3b84f2a7b587132ec2ea813815febf +Author: Noah Goldstein +Date: Thu Mar 24 18:56:13 2022 -0500 + + x86: Add EVEX optimized str{n}casecmp + + geometric_mean(N=40) of all benchmarks EVEX / SSE42: .621 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 84e7c46df4086873eae28a1fb87d2cf5388b1e16) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 711ecf2ee45d61b9..359712c1491a2431 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -53,6 +53,7 @@ sysdep_routines += \ + strcasecmp_l-avx \ + strcasecmp_l-avx2 \ + strcasecmp_l-avx2-rtm \ ++ strcasecmp_l-evex \ + strcasecmp_l-sse2 \ + strcasecmp_l-sse4_2 \ + strcasecmp_l-ssse3 \ +@@ -93,6 +94,7 @@ sysdep_routines += \ + strncase_l-avx \ + strncase_l-avx2 \ + strncase_l-avx2-rtm \ ++ strncase_l-evex \ + strncase_l-sse2 \ + strncase_l-sse4_2 \ + strncase_l-ssse3 \ +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index a687b387c91aa9ae..f6994e5406933d53 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -418,6 +418,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp, ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strcasecmp_evex) + IFUNC_IMPL_ADD (array, i, strcasecmp, + CPU_FEATURE_USABLE (AVX2), + __strcasecmp_avx2) +@@ -438,6 +442,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, ++ IFUNC_IMPL_ADD (array, i, strcasecmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strcasecmp_l_evex) + IFUNC_IMPL_ADD (array, i, strcasecmp, + CPU_FEATURE_USABLE (AVX2), + __strcasecmp_l_avx2) +@@ -572,6 +580,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp, ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strncasecmp_evex) + IFUNC_IMPL_ADD (array, i, strncasecmp, + CPU_FEATURE_USABLE (AVX2), + __strncasecmp_avx2) +@@ -593,6 +605,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, ++ IFUNC_IMPL_ADD (array, i, strncasecmp, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW)), ++ __strncasecmp_l_evex) + IFUNC_IMPL_ADD (array, i, strncasecmp, + CPU_FEATURE_USABLE (AVX2), + __strncasecmp_l_avx2) +diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +index 64d0cd6ef25f73c0..488e99e4997f379b 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h ++++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +@@ -25,6 +25,7 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) +@@ -34,6 +35,10 @@ IFUNC_SELECTOR (void) + if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { ++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) ++ return OPTIMIZE (evex); ++ + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + +diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S +new file mode 100644 +index 0000000000000000..58642db748e3db71 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strcasecmp_l-evex.S +@@ -0,0 +1,23 @@ ++/* strcasecmp_l optimized with EVEX. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef STRCMP ++# define STRCMP __strcasecmp_l_evex ++#endif ++#define USE_AS_STRCASECMP_L ++#include "strcmp-evex.S" +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 0dfa62bd149c02b4..b81b57753c38db1f 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -19,6 +19,9 @@ + #if IS_IN (libc) + + # include ++# if defined USE_AS_STRCASECMP_L ++# include "locale-defines.h" ++# endif + + # ifndef STRCMP + # define STRCMP __strcmp_evex +@@ -34,19 +37,29 @@ + # define VMOVA vmovdqa64 + + # ifdef USE_AS_WCSCMP +-# define TESTEQ subl $0xff, ++# ifndef OVERFLOW_STRCMP ++# define OVERFLOW_STRCMP __wcscmp_evex ++# endif ++ ++# define TESTEQ subl $0xff, + /* Compare packed dwords. */ + # define VPCMP vpcmpd + # define VPMINU vpminud + # define VPTESTM vptestmd ++# define VPTESTNM vptestnmd + /* 1 dword char == 4 bytes. */ + # define SIZE_OF_CHAR 4 + # else ++# ifndef OVERFLOW_STRCMP ++# define OVERFLOW_STRCMP __strcmp_evex ++# endif ++ + # define TESTEQ incl + /* Compare packed bytes. */ + # define VPCMP vpcmpb + # define VPMINU vpminub + # define VPTESTM vptestmb ++# define VPTESTNM vptestnmb + /* 1 byte char == 1 byte. */ + # define SIZE_OF_CHAR 1 + # endif +@@ -73,11 +86,16 @@ + # define VEC_OFFSET (-VEC_SIZE) + # endif + +-# define XMMZERO xmm16 + # define XMM0 xmm17 + # define XMM1 xmm18 + +-# define YMMZERO ymm16 ++# define XMM10 xmm27 ++# define XMM11 xmm28 ++# define XMM12 xmm29 ++# define XMM13 xmm30 ++# define XMM14 xmm31 ++ ++ + # define YMM0 ymm17 + # define YMM1 ymm18 + # define YMM2 ymm19 +@@ -89,6 +107,87 @@ + # define YMM8 ymm25 + # define YMM9 ymm26 + # define YMM10 ymm27 ++# define YMM11 ymm28 ++# define YMM12 ymm29 ++# define YMM13 ymm30 ++# define YMM14 ymm31 ++ ++# ifdef USE_AS_STRCASECMP_L ++# define BYTE_LOOP_REG OFFSET_REG ++# else ++# define BYTE_LOOP_REG ecx ++# endif ++ ++# ifdef USE_AS_STRCASECMP_L ++# ifdef USE_AS_STRNCMP ++# define STRCASECMP __strncasecmp_evex ++# define LOCALE_REG rcx ++# define LOCALE_REG_LP RCX_LP ++# define STRCASECMP_NONASCII __strncasecmp_l_nonascii ++# else ++# define STRCASECMP __strcasecmp_evex ++# define LOCALE_REG rdx ++# define LOCALE_REG_LP RDX_LP ++# define STRCASECMP_NONASCII __strcasecmp_l_nonascii ++# endif ++# endif ++ ++# define LCASE_MIN_YMM %YMM12 ++# define LCASE_MAX_YMM %YMM13 ++# define CASE_ADD_YMM %YMM14 ++ ++# define LCASE_MIN_XMM %XMM12 ++# define LCASE_MAX_XMM %XMM13 ++# define CASE_ADD_XMM %XMM14 ++ ++ /* NB: wcsncmp uses r11 but strcasecmp is never used in ++ conjunction with wcscmp. */ ++# define TOLOWER_BASE %r11 ++ ++# ifdef USE_AS_STRCASECMP_L ++# define _REG(x, y) x ## y ++# define REG(x, y) _REG(x, y) ++# define TOLOWER(reg1, reg2, ext) \ ++ vpsubb REG(LCASE_MIN_, ext), reg1, REG(%ext, 10); \ ++ vpsubb REG(LCASE_MIN_, ext), reg2, REG(%ext, 11); \ ++ vpcmpub $1, REG(LCASE_MAX_, ext), REG(%ext, 10), %k5; \ ++ vpcmpub $1, REG(LCASE_MAX_, ext), REG(%ext, 11), %k6; \ ++ vpaddb reg1, REG(CASE_ADD_, ext), reg1{%k5}; \ ++ vpaddb reg2, REG(CASE_ADD_, ext), reg2{%k6} ++ ++# define TOLOWER_gpr(src, dst) movl (TOLOWER_BASE, src, 4), dst ++# define TOLOWER_YMM(...) TOLOWER(__VA_ARGS__, YMM) ++# define TOLOWER_XMM(...) TOLOWER(__VA_ARGS__, XMM) ++ ++# define CMP_R1_R2(s1_reg, s2_reg, reg_out, ext) \ ++ TOLOWER (s1_reg, s2_reg, ext); \ ++ VPCMP $0, s1_reg, s2_reg, reg_out ++ ++# define CMP_R1_S2(s1_reg, s2_mem, s2_reg, reg_out, ext) \ ++ VMOVU s2_mem, s2_reg; \ ++ CMP_R1_R2(s1_reg, s2_reg, reg_out, ext) ++ ++# define CMP_R1_R2_YMM(...) CMP_R1_R2(__VA_ARGS__, YMM) ++# define CMP_R1_R2_XMM(...) CMP_R1_R2(__VA_ARGS__, XMM) ++ ++# define CMP_R1_S2_YMM(...) CMP_R1_S2(__VA_ARGS__, YMM) ++# define CMP_R1_S2_XMM(...) CMP_R1_S2(__VA_ARGS__, XMM) ++ ++# else ++# define TOLOWER_gpr(...) ++# define TOLOWER_YMM(...) ++# define TOLOWER_XMM(...) ++ ++# define CMP_R1_R2_YMM(s1_reg, s2_reg, reg_out) \ ++ VPCMP $0, s2_reg, s1_reg, reg_out ++ ++# define CMP_R1_R2_XMM(...) CMP_R1_R2_YMM(__VA_ARGS__) ++ ++# define CMP_R1_S2_YMM(s1_reg, s2_mem, unused, reg_out) \ ++ VPCMP $0, s2_mem, s1_reg, reg_out ++ ++# define CMP_R1_S2_XMM(...) CMP_R1_S2_YMM(__VA_ARGS__) ++# endif + + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. +@@ -112,8 +211,45 @@ + returned. */ + + .section .text.evex, "ax", @progbits +-ENTRY(STRCMP) ++ .align 16 ++ .type STRCMP, @function ++ .globl STRCMP ++ .hidden STRCMP ++ ++# ifdef USE_AS_STRCASECMP_L ++ENTRY (STRCASECMP) ++ movq __libc_tsd_LOCALE@gottpoff(%rip), %rax ++ mov %fs:(%rax), %LOCALE_REG_LP ++ ++ /* Either 1 or 5 bytes (dependeing if CET is enabled). */ ++ .p2align 4 ++END (STRCASECMP) ++ /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ ++# endif ++ ++ .p2align 4 ++STRCMP: ++ cfi_startproc ++ _CET_ENDBR ++ CALL_MCOUNT ++ ++# if defined USE_AS_STRCASECMP_L ++ /* We have to fall back on the C implementation for locales with ++ encodings not matching ASCII for single bytes. */ ++# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 ++ mov LOCALE_T___LOCALES + LC_CTYPE * LP_SIZE(%LOCALE_REG), %RAX_LP ++# else ++ mov (%LOCALE_REG), %RAX_LP ++# endif ++ testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax) ++ jne STRCASECMP_NONASCII ++ leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE ++# endif ++ + # ifdef USE_AS_STRNCMP ++ /* Don't overwrite LOCALE_REG (rcx) until we have pass ++ L(one_or_less). Otherwise we might use the wrong locale in ++ the OVERFLOW_STRCMP (strcasecmp_l). */ + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +@@ -125,6 +261,32 @@ ENTRY(STRCMP) + actually bound the buffer. */ + jle L(one_or_less) + # endif ++ ++# if defined USE_AS_STRCASECMP_L ++ .section .rodata.cst32, "aM", @progbits, 32 ++ .align 32 ++L(lcase_min): ++ .quad 0x4141414141414141 ++ .quad 0x4141414141414141 ++ .quad 0x4141414141414141 ++ .quad 0x4141414141414141 ++L(lcase_max): ++ .quad 0x1a1a1a1a1a1a1a1a ++ .quad 0x1a1a1a1a1a1a1a1a ++ .quad 0x1a1a1a1a1a1a1a1a ++ .quad 0x1a1a1a1a1a1a1a1a ++L(case_add): ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .quad 0x2020202020202020 ++ .previous ++ ++ vmovdqa64 L(lcase_min)(%rip), LCASE_MIN_YMM ++ vmovdqa64 L(lcase_max)(%rip), LCASE_MAX_YMM ++ vmovdqa64 L(case_add)(%rip), CASE_ADD_YMM ++# endif ++ + movl %edi, %eax + orl %esi, %eax + /* Shift out the bits irrelivant to page boundary ([63:12]). */ +@@ -139,7 +301,7 @@ L(no_page_cross): + VPTESTM %YMM0, %YMM0, %k2 + /* Each bit cleared in K1 represents a mismatch or a null CHAR + in YMM0 and 32 bytes at (%rsi). */ +- VPCMP $0, (%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + # ifdef USE_AS_STRNCMP + cmpq $CHAR_PER_VEC, %rdx +@@ -169,6 +331,8 @@ L(return_vec_0): + # else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret0): +@@ -188,11 +352,15 @@ L(ret_zero): + + .p2align 4,, 5 + L(one_or_less): ++# ifdef USE_AS_STRCASECMP_L ++ /* Set locale argument for strcasecmp. */ ++ movq %LOCALE_REG, %rdx ++# endif + jb L(ret_zero) +-# ifdef USE_AS_WCSCMP + /* 'nbe' covers the case where length is negative (large + unsigned). */ +- jnbe __wcscmp_evex ++ jnbe OVERFLOW_STRCMP ++# ifdef USE_AS_WCSCMP + movl (%rdi), %edx + xorl %eax, %eax + cmpl (%rsi), %edx +@@ -201,11 +369,10 @@ L(one_or_less): + negl %eax + orl $1, %eax + # else +- /* 'nbe' covers the case where length is negative (large +- unsigned). */ +- jnbe __strcmp_evex + movzbl (%rdi), %eax + movzbl (%rsi), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret1): +@@ -233,6 +400,8 @@ L(return_vec_1): + # else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret2): +@@ -270,6 +439,8 @@ L(return_vec_2): + # else + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret3): +@@ -290,6 +461,8 @@ L(return_vec_3): + # else + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + # endif + L(ret4): +@@ -303,7 +476,7 @@ L(more_3x_vec): + /* Safe to compare 4x vectors. */ + VMOVU (VEC_SIZE)(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (VEC_SIZE)(%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, VEC_SIZE(%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_1) +@@ -315,14 +488,14 @@ L(more_3x_vec): + + VMOVU (VEC_SIZE * 2)(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (VEC_SIZE * 2)(%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (VEC_SIZE * 2)(%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_2) + + VMOVU (VEC_SIZE * 3)(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (VEC_SIZE * 3)(%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (VEC_SIZE * 3)(%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_3) +@@ -381,7 +554,6 @@ L(prepare_loop_aligned): + subl %esi, %eax + andl $(PAGE_SIZE - 1), %eax + +- vpxorq %YMMZERO, %YMMZERO, %YMMZERO + + /* Loop 4x comparisons at a time. */ + .p2align 4 +@@ -413,22 +585,35 @@ L(loop_skip_page_cross_check): + /* A zero CHAR in YMM9 means that there is a null CHAR. */ + VPMINU %YMM8, %YMM9, %YMM9 + +- /* Each bit set in K1 represents a non-null CHAR in YMM8. */ ++ /* Each bit set in K1 represents a non-null CHAR in YMM9. */ + VPTESTM %YMM9, %YMM9, %k1 +- ++# ifndef USE_AS_STRCASECMP_L + vpxorq (VEC_SIZE * 0)(%rsi), %YMM0, %YMM1 + vpxorq (VEC_SIZE * 1)(%rsi), %YMM2, %YMM3 + vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5 + /* Ternary logic to xor (VEC_SIZE * 3)(%rsi) with YMM6 while + oring with YMM1. Result is stored in YMM6. */ + vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM1, %YMM6 +- ++# else ++ VMOVU (VEC_SIZE * 0)(%rsi), %YMM1 ++ TOLOWER_YMM (%YMM0, %YMM1) ++ VMOVU (VEC_SIZE * 1)(%rsi), %YMM3 ++ TOLOWER_YMM (%YMM2, %YMM3) ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM5 ++ TOLOWER_YMM (%YMM4, %YMM5) ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM7 ++ TOLOWER_YMM (%YMM6, %YMM7) ++ vpxorq %YMM0, %YMM1, %YMM1 ++ vpxorq %YMM2, %YMM3, %YMM3 ++ vpxorq %YMM4, %YMM5, %YMM5 ++ vpternlogd $0xde, %YMM7, %YMM1, %YMM6 ++# endif + /* Or together YMM3, YMM5, and YMM6. */ + vpternlogd $0xfe, %YMM3, %YMM5, %YMM6 + + + /* A non-zero CHAR in YMM6 represents a mismatch. */ +- VPCMP $0, %YMMZERO, %YMM6, %k0{%k1} ++ VPTESTNM %YMM6, %YMM6, %k0{%k1} + kmovd %k0, %LOOP_REG + + TESTEQ %LOOP_REG +@@ -437,13 +622,13 @@ L(loop_skip_page_cross_check): + + /* Find which VEC has the mismatch of end of string. */ + VPTESTM %YMM0, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k0{%k1} ++ VPTESTNM %YMM1, %YMM1, %k0{%k1} + kmovd %k0, %ecx + TESTEQ %ecx + jnz L(return_vec_0_end) + + VPTESTM %YMM2, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM3, %k0{%k1} ++ VPTESTNM %YMM3, %YMM3, %k0{%k1} + kmovd %k0, %ecx + TESTEQ %ecx + jnz L(return_vec_1_end) +@@ -457,7 +642,7 @@ L(return_vec_2_3_end): + # endif + + VPTESTM %YMM4, %YMM4, %k1 +- VPCMP $0, %YMMZERO, %YMM5, %k0{%k1} ++ VPTESTNM %YMM5, %YMM5, %k0{%k1} + kmovd %k0, %ecx + TESTEQ %ecx + # if CHAR_PER_VEC <= 16 +@@ -493,6 +678,8 @@ L(return_vec_3_end): + # else + movzbl (VEC_SIZE * 2)(%rdi, %LOOP_REG64), %eax + movzbl (VEC_SIZE * 2)(%rsi, %LOOP_REG64), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -545,6 +732,8 @@ L(return_vec_0_end): + # else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + /* Flip `eax` if `rdi` and `rsi` where swapped in page cross + logic. Subtract `r8d` after xor for zero case. */ +@@ -569,6 +758,8 @@ L(return_vec_1_end): + # else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -598,7 +789,7 @@ L(page_cross_during_loop): + + VMOVA (%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_0_end) +@@ -619,8 +810,7 @@ L(less_1x_vec_till_page_cross): + been loaded earlier so must be valid. */ + VMOVU -VEC_SIZE(%rdi, %rax), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, -VEC_SIZE(%rsi, %rax), %YMM0, %k1{%k2} +- ++ CMP_R1_S2_YMM (%YMM0, -VEC_SIZE(%rsi, %rax), %YMM1, %k1){%k2} + /* Mask of potentially valid bits. The lower bits can be out of + range comparisons (but safe regarding page crosses). */ + +@@ -642,6 +832,8 @@ L(less_1x_vec_till_page_cross): + + # ifdef USE_AS_STRNCMP + # ifdef USE_AS_WCSCMP ++ /* NB: strcasecmp not used with WCSCMP so this access to r11 is ++ safe. */ + movl %eax, %r11d + shrl $2, %r11d + cmpq %r11, %rdx +@@ -679,6 +871,8 @@ L(return_page_cross_cmp_mem): + # else + movzbl VEC_OFFSET(%rdi, %rcx), %eax + movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -709,7 +903,7 @@ L(more_2x_vec_till_page_cross): + + VMOVA VEC_SIZE(%rdi), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, VEC_SIZE(%rsi), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, VEC_SIZE(%rsi), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_1_end) +@@ -724,14 +918,14 @@ L(more_2x_vec_till_page_cross): + /* Safe to include comparisons from lower bytes. */ + VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, -(VEC_SIZE * 2)(%rsi, %rax), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, -(VEC_SIZE * 2)(%rsi, %rax), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_page_cross_0) + + VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, -(VEC_SIZE * 1)(%rsi, %rax), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, -(VEC_SIZE * 1)(%rsi, %rax), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(return_vec_page_cross_1) +@@ -740,6 +934,8 @@ L(more_2x_vec_till_page_cross): + /* Must check length here as length might proclude reading next + page. */ + # ifdef USE_AS_WCSCMP ++ /* NB: strcasecmp not used with WCSCMP so this access to r11 is ++ safe. */ + movl %eax, %r11d + shrl $2, %r11d + cmpq %r11, %rdx +@@ -754,12 +950,19 @@ L(more_2x_vec_till_page_cross): + VMOVA (VEC_SIZE * 3)(%rdi), %YMM6 + VPMINU %YMM4, %YMM6, %YMM9 + VPTESTM %YMM9, %YMM9, %k1 +- ++# ifndef USE_AS_STRCASECMP_L + vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5 + /* YMM6 = YMM5 | ((VEC_SIZE * 3)(%rsi) ^ YMM6). */ + vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM5, %YMM6 +- +- VPCMP $0, %YMMZERO, %YMM6, %k0{%k1} ++# else ++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM5 ++ TOLOWER_YMM (%YMM4, %YMM5) ++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM7 ++ TOLOWER_YMM (%YMM6, %YMM7) ++ vpxorq %YMM4, %YMM5, %YMM5 ++ vpternlogd $0xde, %YMM7, %YMM5, %YMM6 ++# endif ++ VPTESTNM %YMM6, %YMM6, %k0{%k1} + kmovd %k0, %LOOP_REG + TESTEQ %LOOP_REG + jnz L(return_vec_2_3_end) +@@ -815,6 +1018,8 @@ L(return_vec_page_cross_1): + # else + movzbl VEC_OFFSET(%rdi, %rcx), %eax + movzbl VEC_OFFSET(%rsi, %rcx), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -871,7 +1076,7 @@ L(page_cross): + L(page_cross_loop): + VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM1, %k1){%k2} + kmovd %k1, %ecx + TESTEQ %ecx + jnz L(check_ret_vec_page_cross) +@@ -895,7 +1100,7 @@ L(page_cross_loop): + */ + VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 +- VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0, %k1{%k2} ++ CMP_R1_S2_YMM (%YMM0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM1, %k1){%k2} + + kmovd %k1, %ecx + # ifdef USE_AS_STRNCMP +@@ -930,6 +1135,8 @@ L(ret_vec_page_cross_cont): + # else + movzbl (%rdi, %rcx, SIZE_OF_CHAR), %eax + movzbl (%rsi, %rcx, SIZE_OF_CHAR), %ecx ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %ecx) + subl %ecx, %eax + xorl %r8d, %eax + subl %r8d, %eax +@@ -989,7 +1196,7 @@ L(less_1x_vec_till_page): + /* Use 16 byte comparison. */ + vmovdqu (%rdi), %xmm0 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, (%rsi), %xmm0, %k1{%k2} ++ CMP_R1_S2_XMM (%xmm0, (%rsi), %xmm1, %k1){%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP + subl $0xf, %ecx +@@ -1009,7 +1216,7 @@ L(less_1x_vec_till_page): + # endif + vmovdqu (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0, %k1{%k2} ++ CMP_R1_S2_XMM (%xmm0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1, %k1){%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP + subl $0xf, %ecx +@@ -1048,7 +1255,7 @@ L(less_16_till_page): + vmovq (%rdi), %xmm0 + vmovq (%rsi), %xmm1 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, %xmm1, %xmm0, %k1{%k2} ++ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP + subl $0x3, %ecx +@@ -1068,7 +1275,7 @@ L(less_16_till_page): + vmovq (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 + vmovq (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, %xmm1, %xmm0, %k1{%k2} ++ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2} + kmovd %k1, %ecx + # ifdef USE_AS_WCSCMP + subl $0x3, %ecx +@@ -1128,7 +1335,7 @@ L(ret_less_8_wcs): + vmovd (%rdi), %xmm0 + vmovd (%rsi), %xmm1 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, %xmm1, %xmm0, %k1{%k2} ++ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2} + kmovd %k1, %ecx + subl $0xf, %ecx + jnz L(check_ret_vec_page_cross) +@@ -1143,7 +1350,7 @@ L(ret_less_8_wcs): + vmovd (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0 + vmovd (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1 + VPTESTM %xmm0, %xmm0, %k2 +- VPCMP $0, %xmm1, %xmm0, %k1{%k2} ++ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2} + kmovd %k1, %ecx + subl $0xf, %ecx + jnz L(check_ret_vec_page_cross) +@@ -1176,7 +1383,9 @@ L(less_4_till_page): + L(less_4_loop): + movzbl (%rdi), %eax + movzbl (%rsi, %rdi), %ecx +- subl %ecx, %eax ++ TOLOWER_gpr (%rax, %eax) ++ TOLOWER_gpr (%rcx, %BYTE_LOOP_REG) ++ subl %BYTE_LOOP_REG, %eax + jnz L(ret_less_4_loop) + testl %ecx, %ecx + jz L(ret_zero_4_loop) +@@ -1203,5 +1412,6 @@ L(ret_less_4_loop): + subl %r8d, %eax + ret + # endif +-END(STRCMP) ++ cfi_endproc ++ .size STRCMP, .-STRCMP + #endif +diff --git a/sysdeps/x86_64/multiarch/strncase_l-evex.S b/sysdeps/x86_64/multiarch/strncase_l-evex.S +new file mode 100644 +index 0000000000000000..8a5af3695cb8cfff +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strncase_l-evex.S +@@ -0,0 +1,25 @@ ++/* strncasecmp_l optimized with EVEX. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef STRCMP ++# define STRCMP __strncasecmp_l_evex ++#endif ++#define OVERFLOW_STRCMP __strcasecmp_l_evex ++#define USE_AS_STRCASECMP_L ++#define USE_AS_STRNCMP ++#include "strcmp-evex.S" diff --git a/SOURCES/glibc-upstream-2.34-229.patch b/SOURCES/glibc-upstream-2.34-229.patch new file mode 100644 index 0000000..97f6bbd --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-229.patch @@ -0,0 +1,902 @@ +commit 80883f43545f4f9afcb26beef9358dfdcd021bd6 +Author: Noah Goldstein +Date: Wed Mar 23 16:57:46 2022 -0500 + + x86: Remove AVX str{n}casecmp + + The rational is: + + 1. SSE42 has nearly identical logic so any benefit is minimal (3.4% + regression on Tigerlake using SSE42 versus AVX across the + benchtest suite). + 2. AVX2 version covers the majority of targets that previously + prefered it. + 3. The targets where AVX would still be best (SnB and IVB) are + becoming outdated. + + All in all the saving the code size is worth it. + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 305769b2a15c2e96f9e1b5195d3c4e0d6f0f4b68) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 359712c1491a2431..bca82e38d86cc440 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -50,7 +50,6 @@ sysdep_routines += \ + stpncpy-evex \ + stpncpy-sse2-unaligned \ + stpncpy-ssse3 \ +- strcasecmp_l-avx \ + strcasecmp_l-avx2 \ + strcasecmp_l-avx2-rtm \ + strcasecmp_l-evex \ +@@ -91,7 +90,6 @@ sysdep_routines += \ + strlen-avx2-rtm \ + strlen-evex \ + strlen-sse2 \ +- strncase_l-avx \ + strncase_l-avx2 \ + strncase_l-avx2-rtm \ + strncase_l-evex \ +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index f6994e5406933d53..4c7834dd0b951fa4 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -429,9 +429,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strcasecmp_avx2_rtm) +- IFUNC_IMPL_ADD (array, i, strcasecmp, +- CPU_FEATURE_USABLE (AVX), +- __strcasecmp_avx) + IFUNC_IMPL_ADD (array, i, strcasecmp, + CPU_FEATURE_USABLE (SSE4_2), + __strcasecmp_sse42) +@@ -453,9 +450,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strcasecmp_l_avx2_rtm) +- IFUNC_IMPL_ADD (array, i, strcasecmp_l, +- CPU_FEATURE_USABLE (AVX), +- __strcasecmp_l_avx) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + CPU_FEATURE_USABLE (SSE4_2), + __strcasecmp_l_sse42) +@@ -591,9 +585,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strncasecmp_avx2_rtm) +- IFUNC_IMPL_ADD (array, i, strncasecmp, +- CPU_FEATURE_USABLE (AVX), +- __strncasecmp_avx) + IFUNC_IMPL_ADD (array, i, strncasecmp, + CPU_FEATURE_USABLE (SSE4_2), + __strncasecmp_sse42) +@@ -616,9 +607,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (CPU_FEATURE_USABLE (AVX2) + && CPU_FEATURE_USABLE (RTM)), + __strncasecmp_l_avx2_rtm) +- IFUNC_IMPL_ADD (array, i, strncasecmp_l, +- CPU_FEATURE_USABLE (AVX), +- __strncasecmp_l_avx) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + CPU_FEATURE_USABLE (SSE4_2), + __strncasecmp_l_sse42) +diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +index 488e99e4997f379b..40819caf5ab10337 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h ++++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +@@ -22,7 +22,6 @@ + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; +@@ -46,9 +45,6 @@ IFUNC_SELECTOR (void) + return OPTIMIZE (avx2); + } + +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) +- return OPTIMIZE (avx); +- + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) + && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) + return OPTIMIZE (sse42); +diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S +deleted file mode 100644 +index 647aa05714d7a36c..0000000000000000 +--- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S ++++ /dev/null +@@ -1,22 +0,0 @@ +-/* strcasecmp_l optimized with AVX. +- Copyright (C) 2017-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#define STRCMP_SSE42 __strcasecmp_l_avx +-#define USE_AVX 1 +-#define USE_AS_STRCASECMP_L +-#include "strcmp-sse42.S" +diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S +index a6825de8195ad8c6..466c6a92a612ebcb 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S ++++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S +@@ -42,13 +42,8 @@ + # define UPDATE_STRNCMP_COUNTER + #endif + +-#ifdef USE_AVX +-# define SECTION avx +-# define GLABEL(l) l##_avx +-#else +-# define SECTION sse4.2 +-# define GLABEL(l) l##_sse42 +-#endif ++#define SECTION sse4.2 ++#define GLABEL(l) l##_sse42 + + #define LABEL(l) .L##l + +@@ -106,21 +101,7 @@ END (GLABEL(__strncasecmp)) + #endif + + +-#ifdef USE_AVX +-# define movdqa vmovdqa +-# define movdqu vmovdqu +-# define pmovmskb vpmovmskb +-# define pcmpistri vpcmpistri +-# define psubb vpsubb +-# define pcmpeqb vpcmpeqb +-# define psrldq vpsrldq +-# define pslldq vpslldq +-# define palignr vpalignr +-# define pxor vpxor +-# define D(arg) arg, arg +-#else +-# define D(arg) arg +-#endif ++#define arg arg + + STRCMP_SSE42: + cfi_startproc +@@ -192,18 +173,7 @@ LABEL(case_add): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +-# ifdef USE_AVX +-# define TOLOWER(reg1, reg2) \ +- vpaddb LCASE_MIN_reg, reg1, %xmm7; \ +- vpaddb LCASE_MIN_reg, reg2, %xmm8; \ +- vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \ +- vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \ +- vpandn CASE_ADD_reg, %xmm7, %xmm7; \ +- vpandn CASE_ADD_reg, %xmm8, %xmm8; \ +- vpaddb %xmm7, reg1, reg1; \ +- vpaddb %xmm8, reg2, reg2 +-# else +-# define TOLOWER(reg1, reg2) \ ++# define TOLOWER(reg1, reg2) \ + movdqa LCASE_MIN_reg, %xmm7; \ + movdqa LCASE_MIN_reg, %xmm8; \ + paddb reg1, %xmm7; \ +@@ -214,15 +184,15 @@ LABEL(case_add): + pandn CASE_ADD_reg, %xmm8; \ + paddb %xmm7, reg1; \ + paddb %xmm8, reg2 +-# endif ++ + TOLOWER (%xmm1, %xmm2) + #else + # define TOLOWER(reg1, reg2) + #endif +- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */ +- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ +- pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */ +- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ ++ pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ ++ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ ++ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ ++ psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ + jnz LABEL(less16bytes)/* If not, find different value or null char */ +@@ -246,7 +216,7 @@ LABEL(crosscache): + xor %r8d, %r8d + and $0xf, %ecx /* offset of rsi */ + and $0xf, %eax /* offset of rdi */ +- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */ ++ pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ + cmp %eax, %ecx + je LABEL(ashr_0) /* rsi and rdi relative offset same */ + ja LABEL(bigger) +@@ -260,7 +230,7 @@ LABEL(bigger): + sub %rcx, %r9 + lea LABEL(unaligned_table)(%rip), %r10 + movslq (%r10, %r9,4), %r9 +- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ ++ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ + lea (%r10, %r9), %r10 + _CET_NOTRACK jmp *%r10 /* jump to corresponding case */ + +@@ -273,15 +243,15 @@ LABEL(bigger): + LABEL(ashr_0): + + movdqa (%rsi), %xmm1 +- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */ ++ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L +- pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */ ++ pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ + #else + movdqa (%rdi), %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */ ++ pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ + #endif +- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/ ++ psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %r9d + shr %cl, %edx /* adjust 0xffff for offset */ + shr %cl, %r9d /* adjust for 16-byte offset */ +@@ -361,10 +331,10 @@ LABEL(ashr_0_exit_use): + */ + .p2align 4 + LABEL(ashr_1): +- pslldq $15, D(%xmm2) /* shift first string to align with second */ ++ pslldq $15, %xmm2 /* shift first string to align with second */ + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */ +- psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/ ++ pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ ++ psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %r9d + shr %cl, %edx /* adjust 0xffff for offset */ + shr %cl, %r9d /* adjust for 16-byte offset */ +@@ -392,7 +362,7 @@ LABEL(loop_ashr_1_use): + + LABEL(nibble_ashr_1_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $1, -16(%rdi, %rdx), D(%xmm0) ++ palignr $1, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -411,7 +381,7 @@ LABEL(nibble_ashr_1_restart_use): + jg LABEL(nibble_ashr_1_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $1, -16(%rdi, %rdx), D(%xmm0) ++ palignr $1, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -431,7 +401,7 @@ LABEL(nibble_ashr_1_restart_use): + LABEL(nibble_ashr_1_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $1, D(%xmm0) ++ psrldq $1, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -449,10 +419,10 @@ LABEL(nibble_ashr_1_use): + */ + .p2align 4 + LABEL(ashr_2): +- pslldq $14, D(%xmm2) ++ pslldq $14, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -480,7 +450,7 @@ LABEL(loop_ashr_2_use): + + LABEL(nibble_ashr_2_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $2, -16(%rdi, %rdx), D(%xmm0) ++ palignr $2, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -499,7 +469,7 @@ LABEL(nibble_ashr_2_restart_use): + jg LABEL(nibble_ashr_2_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $2, -16(%rdi, %rdx), D(%xmm0) ++ palignr $2, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -519,7 +489,7 @@ LABEL(nibble_ashr_2_restart_use): + LABEL(nibble_ashr_2_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $2, D(%xmm0) ++ psrldq $2, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -537,10 +507,10 @@ LABEL(nibble_ashr_2_use): + */ + .p2align 4 + LABEL(ashr_3): +- pslldq $13, D(%xmm2) ++ pslldq $13, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -568,7 +538,7 @@ LABEL(loop_ashr_3_use): + + LABEL(nibble_ashr_3_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $3, -16(%rdi, %rdx), D(%xmm0) ++ palignr $3, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -587,7 +557,7 @@ LABEL(nibble_ashr_3_restart_use): + jg LABEL(nibble_ashr_3_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $3, -16(%rdi, %rdx), D(%xmm0) ++ palignr $3, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -607,7 +577,7 @@ LABEL(nibble_ashr_3_restart_use): + LABEL(nibble_ashr_3_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $3, D(%xmm0) ++ psrldq $3, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -625,10 +595,10 @@ LABEL(nibble_ashr_3_use): + */ + .p2align 4 + LABEL(ashr_4): +- pslldq $12, D(%xmm2) ++ pslldq $12, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -657,7 +627,7 @@ LABEL(loop_ashr_4_use): + + LABEL(nibble_ashr_4_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $4, -16(%rdi, %rdx), D(%xmm0) ++ palignr $4, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -676,7 +646,7 @@ LABEL(nibble_ashr_4_restart_use): + jg LABEL(nibble_ashr_4_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $4, -16(%rdi, %rdx), D(%xmm0) ++ palignr $4, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -696,7 +666,7 @@ LABEL(nibble_ashr_4_restart_use): + LABEL(nibble_ashr_4_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $4, D(%xmm0) ++ psrldq $4, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -714,10 +684,10 @@ LABEL(nibble_ashr_4_use): + */ + .p2align 4 + LABEL(ashr_5): +- pslldq $11, D(%xmm2) ++ pslldq $11, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -746,7 +716,7 @@ LABEL(loop_ashr_5_use): + + LABEL(nibble_ashr_5_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $5, -16(%rdi, %rdx), D(%xmm0) ++ palignr $5, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -766,7 +736,7 @@ LABEL(nibble_ashr_5_restart_use): + + movdqa (%rdi, %rdx), %xmm0 + +- palignr $5, -16(%rdi, %rdx), D(%xmm0) ++ palignr $5, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -786,7 +756,7 @@ LABEL(nibble_ashr_5_restart_use): + LABEL(nibble_ashr_5_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $5, D(%xmm0) ++ psrldq $5, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -804,10 +774,10 @@ LABEL(nibble_ashr_5_use): + */ + .p2align 4 + LABEL(ashr_6): +- pslldq $10, D(%xmm2) ++ pslldq $10, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -836,7 +806,7 @@ LABEL(loop_ashr_6_use): + + LABEL(nibble_ashr_6_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $6, -16(%rdi, %rdx), D(%xmm0) ++ palignr $6, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -855,7 +825,7 @@ LABEL(nibble_ashr_6_restart_use): + jg LABEL(nibble_ashr_6_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $6, -16(%rdi, %rdx), D(%xmm0) ++ palignr $6, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -875,7 +845,7 @@ LABEL(nibble_ashr_6_restart_use): + LABEL(nibble_ashr_6_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $6, D(%xmm0) ++ psrldq $6, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -893,10 +863,10 @@ LABEL(nibble_ashr_6_use): + */ + .p2align 4 + LABEL(ashr_7): +- pslldq $9, D(%xmm2) ++ pslldq $9, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -925,7 +895,7 @@ LABEL(loop_ashr_7_use): + + LABEL(nibble_ashr_7_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $7, -16(%rdi, %rdx), D(%xmm0) ++ palignr $7, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -944,7 +914,7 @@ LABEL(nibble_ashr_7_restart_use): + jg LABEL(nibble_ashr_7_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $7, -16(%rdi, %rdx), D(%xmm0) ++ palignr $7, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 + #else +@@ -964,7 +934,7 @@ LABEL(nibble_ashr_7_restart_use): + LABEL(nibble_ashr_7_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $7, D(%xmm0) ++ psrldq $7, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -982,10 +952,10 @@ LABEL(nibble_ashr_7_use): + */ + .p2align 4 + LABEL(ashr_8): +- pslldq $8, D(%xmm2) ++ pslldq $8, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1014,7 +984,7 @@ LABEL(loop_ashr_8_use): + + LABEL(nibble_ashr_8_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $8, -16(%rdi, %rdx), D(%xmm0) ++ palignr $8, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1033,7 +1003,7 @@ LABEL(nibble_ashr_8_restart_use): + jg LABEL(nibble_ashr_8_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $8, -16(%rdi, %rdx), D(%xmm0) ++ palignr $8, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1053,7 +1023,7 @@ LABEL(nibble_ashr_8_restart_use): + LABEL(nibble_ashr_8_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $8, D(%xmm0) ++ psrldq $8, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1071,10 +1041,10 @@ LABEL(nibble_ashr_8_use): + */ + .p2align 4 + LABEL(ashr_9): +- pslldq $7, D(%xmm2) ++ pslldq $7, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1104,7 +1074,7 @@ LABEL(loop_ashr_9_use): + LABEL(nibble_ashr_9_restart_use): + movdqa (%rdi, %rdx), %xmm0 + +- palignr $9, -16(%rdi, %rdx), D(%xmm0) ++ palignr $9, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1123,7 +1093,7 @@ LABEL(nibble_ashr_9_restart_use): + jg LABEL(nibble_ashr_9_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $9, -16(%rdi, %rdx), D(%xmm0) ++ palignr $9, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1143,7 +1113,7 @@ LABEL(nibble_ashr_9_restart_use): + LABEL(nibble_ashr_9_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $9, D(%xmm0) ++ psrldq $9, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1161,10 +1131,10 @@ LABEL(nibble_ashr_9_use): + */ + .p2align 4 + LABEL(ashr_10): +- pslldq $6, D(%xmm2) ++ pslldq $6, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1193,7 +1163,7 @@ LABEL(loop_ashr_10_use): + + LABEL(nibble_ashr_10_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $10, -16(%rdi, %rdx), D(%xmm0) ++ palignr $10, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1212,7 +1182,7 @@ LABEL(nibble_ashr_10_restart_use): + jg LABEL(nibble_ashr_10_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $10, -16(%rdi, %rdx), D(%xmm0) ++ palignr $10, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1232,7 +1202,7 @@ LABEL(nibble_ashr_10_restart_use): + LABEL(nibble_ashr_10_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $10, D(%xmm0) ++ psrldq $10, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1250,10 +1220,10 @@ LABEL(nibble_ashr_10_use): + */ + .p2align 4 + LABEL(ashr_11): +- pslldq $5, D(%xmm2) ++ pslldq $5, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1282,7 +1252,7 @@ LABEL(loop_ashr_11_use): + + LABEL(nibble_ashr_11_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $11, -16(%rdi, %rdx), D(%xmm0) ++ palignr $11, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1301,7 +1271,7 @@ LABEL(nibble_ashr_11_restart_use): + jg LABEL(nibble_ashr_11_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $11, -16(%rdi, %rdx), D(%xmm0) ++ palignr $11, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1321,7 +1291,7 @@ LABEL(nibble_ashr_11_restart_use): + LABEL(nibble_ashr_11_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $11, D(%xmm0) ++ psrldq $11, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1339,10 +1309,10 @@ LABEL(nibble_ashr_11_use): + */ + .p2align 4 + LABEL(ashr_12): +- pslldq $4, D(%xmm2) ++ pslldq $4, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1371,7 +1341,7 @@ LABEL(loop_ashr_12_use): + + LABEL(nibble_ashr_12_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $12, -16(%rdi, %rdx), D(%xmm0) ++ palignr $12, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1390,7 +1360,7 @@ LABEL(nibble_ashr_12_restart_use): + jg LABEL(nibble_ashr_12_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $12, -16(%rdi, %rdx), D(%xmm0) ++ palignr $12, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1410,7 +1380,7 @@ LABEL(nibble_ashr_12_restart_use): + LABEL(nibble_ashr_12_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $12, D(%xmm0) ++ psrldq $12, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1428,10 +1398,10 @@ LABEL(nibble_ashr_12_use): + */ + .p2align 4 + LABEL(ashr_13): +- pslldq $3, D(%xmm2) ++ pslldq $3, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1461,7 +1431,7 @@ LABEL(loop_ashr_13_use): + + LABEL(nibble_ashr_13_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $13, -16(%rdi, %rdx), D(%xmm0) ++ palignr $13, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1480,7 +1450,7 @@ LABEL(nibble_ashr_13_restart_use): + jg LABEL(nibble_ashr_13_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $13, -16(%rdi, %rdx), D(%xmm0) ++ palignr $13, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1500,7 +1470,7 @@ LABEL(nibble_ashr_13_restart_use): + LABEL(nibble_ashr_13_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $13, D(%xmm0) ++ psrldq $13, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1518,10 +1488,10 @@ LABEL(nibble_ashr_13_use): + */ + .p2align 4 + LABEL(ashr_14): +- pslldq $2, D(%xmm2) ++ pslldq $2, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1551,7 +1521,7 @@ LABEL(loop_ashr_14_use): + + LABEL(nibble_ashr_14_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $14, -16(%rdi, %rdx), D(%xmm0) ++ palignr $14, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1570,7 +1540,7 @@ LABEL(nibble_ashr_14_restart_use): + jg LABEL(nibble_ashr_14_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $14, -16(%rdi, %rdx), D(%xmm0) ++ palignr $14, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1590,7 +1560,7 @@ LABEL(nibble_ashr_14_restart_use): + LABEL(nibble_ashr_14_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $14, D(%xmm0) ++ psrldq $14, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +@@ -1608,10 +1578,10 @@ LABEL(nibble_ashr_14_use): + */ + .p2align 4 + LABEL(ashr_15): +- pslldq $1, D(%xmm2) ++ pslldq $1, %xmm2 + TOLOWER (%xmm1, %xmm2) +- pcmpeqb %xmm1, D(%xmm2) +- psubb %xmm0, D(%xmm2) ++ pcmpeqb %xmm1, %xmm2 ++ psubb %xmm0, %xmm2 + pmovmskb %xmm2, %r9d + shr %cl, %edx + shr %cl, %r9d +@@ -1643,7 +1613,7 @@ LABEL(loop_ashr_15_use): + + LABEL(nibble_ashr_15_restart_use): + movdqa (%rdi, %rdx), %xmm0 +- palignr $15, -16(%rdi, %rdx), D(%xmm0) ++ palignr $15, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1662,7 +1632,7 @@ LABEL(nibble_ashr_15_restart_use): + jg LABEL(nibble_ashr_15_use) + + movdqa (%rdi, %rdx), %xmm0 +- palignr $15, -16(%rdi, %rdx), D(%xmm0) ++ palignr $15, -16(%rdi, %rdx), %xmm0 + #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 + #else +@@ -1682,7 +1652,7 @@ LABEL(nibble_ashr_15_restart_use): + LABEL(nibble_ashr_15_use): + sub $0x1000, %r10 + movdqa -16(%rdi, %rdx), %xmm0 +- psrldq $15, D(%xmm0) ++ psrldq $15, %xmm0 + pcmpistri $0x3a,%xmm0, %xmm0 + #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp %r11, %rcx +diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S +deleted file mode 100644 +index f1d3fefdd94674b8..0000000000000000 +--- a/sysdeps/x86_64/multiarch/strncase_l-avx.S ++++ /dev/null +@@ -1,22 +0,0 @@ +-/* strncasecmp_l optimized with AVX. +- Copyright (C) 2017-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#define STRCMP_SSE42 __strncasecmp_l_avx +-#define USE_AVX 1 +-#define USE_AS_STRNCASECMP_L +-#include "strcmp-sse42.S" diff --git a/SOURCES/glibc-upstream-2.34-230.patch b/SOURCES/glibc-upstream-2.34-230.patch new file mode 100644 index 0000000..b7eb594 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-230.patch @@ -0,0 +1,253 @@ +commit 4ff6ae069b7caacd5f99088abd755717b994f660 +Author: Noah Goldstein +Date: Fri Mar 25 17:13:33 2022 -0500 + + x86: Small improvements for wcslen + + Just a few QOL changes. + 1. Prefer `add` > `lea` as it has high execution units it can run + on. + 2. Don't break macro-fusion between `test` and `jcc` + 3. Reduce code size by removing gratuitous padding bytes (-90 + bytes). + + geometric_mean(N=20) of all benchmarks New / Original: 0.959 + + All string/memory tests pass. + Reviewed-by: H.J. Lu + + (cherry picked from commit 244b415d386487521882debb845a040a4758cb18) + +diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S +index 61edea1d14d454c6..ad066863a44ea0a5 100644 +--- a/sysdeps/x86_64/wcslen.S ++++ b/sysdeps/x86_64/wcslen.S +@@ -41,82 +41,82 @@ ENTRY (__wcslen) + pxor %xmm0, %xmm0 + + lea 32(%rdi), %rax +- lea 16(%rdi), %rcx ++ addq $16, %rdi + and $-16, %rax + + pcmpeqd (%rax), %xmm0 + pmovmskb %xmm0, %edx + pxor %xmm1, %xmm1 ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm1 + pmovmskb %xmm1, %edx + pxor %xmm2, %xmm2 ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm2 + pmovmskb %xmm2, %edx + pxor %xmm3, %xmm3 ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm3 + pmovmskb %xmm3, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm0 + pmovmskb %xmm0, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm1 + pmovmskb %xmm1, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm2 + pmovmskb %xmm2, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm3 + pmovmskb %xmm3, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm0 + pmovmskb %xmm0, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm1 + pmovmskb %xmm1, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm2 + pmovmskb %xmm2, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + pcmpeqd (%rax), %xmm3 + pmovmskb %xmm3, %edx ++ addq $16, %rax + test %edx, %edx +- lea 16(%rax), %rax + jnz L(exit) + + and $-0x40, %rax +@@ -133,104 +133,100 @@ L(aligned_64_loop): + pminub %xmm0, %xmm2 + pcmpeqd %xmm3, %xmm2 + pmovmskb %xmm2, %edx ++ addq $64, %rax + test %edx, %edx +- lea 64(%rax), %rax + jz L(aligned_64_loop) + + pcmpeqd -64(%rax), %xmm3 + pmovmskb %xmm3, %edx ++ addq $48, %rdi + test %edx, %edx +- lea 48(%rcx), %rcx + jnz L(exit) + + pcmpeqd %xmm1, %xmm3 + pmovmskb %xmm3, %edx ++ addq $-16, %rdi + test %edx, %edx +- lea -16(%rcx), %rcx + jnz L(exit) + + pcmpeqd -32(%rax), %xmm3 + pmovmskb %xmm3, %edx ++ addq $-16, %rdi + test %edx, %edx +- lea -16(%rcx), %rcx + jnz L(exit) + + pcmpeqd %xmm6, %xmm3 + pmovmskb %xmm3, %edx ++ addq $-16, %rdi + test %edx, %edx +- lea -16(%rcx), %rcx +- jnz L(exit) +- +- jmp L(aligned_64_loop) ++ jz L(aligned_64_loop) + + .p2align 4 + L(exit): +- sub %rcx, %rax ++ sub %rdi, %rax + shr $2, %rax + test %dl, %dl + jz L(exit_high) + +- mov %dl, %cl +- and $15, %cl ++ andl $15, %edx + jz L(exit_1) + ret + +- .p2align 4 ++ /* No align here. Naturally aligned % 16 == 1. */ + L(exit_high): +- mov %dh, %ch +- and $15, %ch ++ andl $(15 << 8), %edx + jz L(exit_3) + add $2, %rax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_1): + add $1, %rax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_3): + add $3, %rax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail0): +- xor %rax, %rax ++ xorl %eax, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail1): +- mov $1, %rax ++ movl $1, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail2): +- mov $2, %rax ++ movl $2, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail3): +- mov $3, %rax ++ movl $3, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail4): +- mov $4, %rax ++ movl $4, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail5): +- mov $5, %rax ++ movl $5, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail6): +- mov $6, %rax ++ movl $6, %eax + ret + +- .p2align 4 ++ .p2align 3 + L(exit_tail7): +- mov $7, %rax ++ movl $7, %eax + ret + + END (__wcslen) diff --git a/SOURCES/glibc-upstream-2.34-231.patch b/SOURCES/glibc-upstream-2.34-231.patch new file mode 100644 index 0000000..3c928b8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-231.patch @@ -0,0 +1,956 @@ +commit ffe75982cc0bb2d25d55ed566a3731b9c3017e6f +Author: Noah Goldstein +Date: Fri Apr 15 12:28:00 2022 -0500 + + x86: Remove memcmp-sse4.S + + Code didn't actually use any sse4 instructions since `ptest` was + removed in: + + commit 2f9062d7171850451e6044ef78d91ff8c017b9c0 + Author: Noah Goldstein + Date: Wed Nov 10 16:18:56 2021 -0600 + + x86: Shrink memcmp-sse4.S code size + + The new memcmp-sse2 implementation is also faster. + + geometric_mean(N=20) of page cross cases SSE2 / SSE4: 0.905 + + Note there are two regressions preferring SSE2 for Size = 1 and Size = + 65. + + Size = 1: + size, align0, align1, ret, New Time/Old Time + 1, 1, 1, 0, 1.2 + 1, 1, 1, 1, 1.197 + 1, 1, 1, -1, 1.2 + + This is intentional. Size == 1 is significantly less hot based on + profiles of GCC11 and Python3 than sizes [4, 8] (which is made + hotter). + + Python3 Size = 1 -> 13.64% + Python3 Size = [4, 8] -> 60.92% + + GCC11 Size = 1 -> 1.29% + GCC11 Size = [4, 8] -> 33.86% + + size, align0, align1, ret, New Time/Old Time + 4, 4, 4, 0, 0.622 + 4, 4, 4, 1, 0.797 + 4, 4, 4, -1, 0.805 + 5, 5, 5, 0, 0.623 + 5, 5, 5, 1, 0.777 + 5, 5, 5, -1, 0.802 + 6, 6, 6, 0, 0.625 + 6, 6, 6, 1, 0.813 + 6, 6, 6, -1, 0.788 + 7, 7, 7, 0, 0.625 + 7, 7, 7, 1, 0.799 + 7, 7, 7, -1, 0.795 + 8, 8, 8, 0, 0.625 + 8, 8, 8, 1, 0.848 + 8, 8, 8, -1, 0.914 + 9, 9, 9, 0, 0.625 + + Size = 65: + size, align0, align1, ret, New Time/Old Time + 65, 0, 0, 0, 1.103 + 65, 0, 0, 1, 1.216 + 65, 0, 0, -1, 1.227 + 65, 65, 0, 0, 1.091 + 65, 0, 65, 1, 1.19 + 65, 65, 65, -1, 1.215 + + This is because A) the checks in range [65, 96] are now unrolled 2x + and B) because smaller values <= 16 are now given a hotter path. By + contrast the SSE4 version has a branch for Size = 80. The unrolled + version has get better performance for returns which need both + comparisons. + + size, align0, align1, ret, New Time/Old Time + 128, 4, 8, 0, 0.858 + 128, 4, 8, 1, 0.879 + 128, 4, 8, -1, 0.888 + + As well, out of microbenchmark environments that are not full + predictable the branch will have a real-cost. + Reviewed-by: H.J. Lu + + (cherry picked from commit 7cbc03d03091d5664060924789afe46d30a5477e) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index bca82e38d86cc440..b503e4b81e92a11c 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -11,7 +11,6 @@ sysdep_routines += \ + memcmp-avx2-movbe-rtm \ + memcmp-evex-movbe \ + memcmp-sse2 \ +- memcmp-sse4 \ + memcmp-ssse3 \ + memcpy-ssse3 \ + memcpy-ssse3-back \ +@@ -174,7 +173,6 @@ sysdep_routines += \ + wmemcmp-avx2-movbe-rtm \ + wmemcmp-c \ + wmemcmp-evex-movbe \ +- wmemcmp-sse4 \ + wmemcmp-ssse3 \ + # sysdep_routines + endif +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index 4c7834dd0b951fa4..e5e48b36c3175e68 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -78,8 +78,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (MOVBE)), + __memcmp_evex_movbe) +- IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1), +- __memcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3), + __memcmp_ssse3) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2)) +@@ -824,8 +822,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (BMI2) + && CPU_FEATURE_USABLE (MOVBE)), + __wmemcmp_evex_movbe) +- IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1), +- __wmemcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3), + __wmemcmp_ssse3) + IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2)) +diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h +index 89e2129968e1e49c..5b92594093c1e0bb 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h ++++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h +@@ -21,7 +21,6 @@ + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden; +@@ -47,9 +46,6 @@ IFUNC_SELECTOR (void) + return OPTIMIZE (avx2_movbe); + } + +- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) +- return OPTIMIZE (sse4_1); +- + if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) + return OPTIMIZE (ssse3); + +diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S +deleted file mode 100644 +index 97c102a9c5ab2b91..0000000000000000 +--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S ++++ /dev/null +@@ -1,804 +0,0 @@ +-/* memcmp with SSE4.1, wmemcmp with SSE4.1 +- Copyright (C) 2010-2021 Free Software Foundation, Inc. +- Contributed by Intel Corporation. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#if IS_IN (libc) +- +-# include +- +-# ifndef MEMCMP +-# define MEMCMP __memcmp_sse4_1 +-# endif +- +-#ifdef USE_AS_WMEMCMP +-# define CMPEQ pcmpeqd +-# define CHAR_SIZE 4 +-#else +-# define CMPEQ pcmpeqb +-# define CHAR_SIZE 1 +-#endif +- +- +-/* Warning! +- wmemcmp has to use SIGNED comparison for elements. +- memcmp has to use UNSIGNED comparison for elemnts. +-*/ +- +- .section .text.sse4.1,"ax",@progbits +-ENTRY (MEMCMP) +-# ifdef USE_AS_WMEMCMP +- shl $2, %RDX_LP +-# elif defined __ILP32__ +- /* Clear the upper 32 bits. */ +- mov %edx, %edx +-# endif +- cmp $79, %RDX_LP +- ja L(79bytesormore) +- +- cmp $CHAR_SIZE, %RDX_LP +- jbe L(firstbyte) +- +- /* N in (CHAR_SIZE, 79) bytes. */ +- cmpl $32, %edx +- ja L(more_32_bytes) +- +- cmpl $16, %edx +- jae L(16_to_32_bytes) +- +-# ifndef USE_AS_WMEMCMP +- cmpl $8, %edx +- jae L(8_to_16_bytes) +- +- cmpl $4, %edx +- jb L(2_to_3_bytes) +- +- movl (%rdi), %eax +- movl (%rsi), %ecx +- +- bswap %eax +- bswap %ecx +- +- shlq $32, %rax +- shlq $32, %rcx +- +- movl -4(%rdi, %rdx), %edi +- movl -4(%rsi, %rdx), %esi +- +- bswap %edi +- bswap %esi +- +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- cmovne %edx, %eax +- sbbl %ecx, %ecx +- orl %ecx, %eax +- ret +- +- .p2align 4,, 8 +-L(2_to_3_bytes): +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- subl %ecx, %eax +- ret +- +- .p2align 4,, 8 +-L(8_to_16_bytes): +- movq (%rdi), %rax +- movq (%rsi), %rcx +- +- bswap %rax +- bswap %rcx +- +- subq %rcx, %rax +- jne L(8_to_16_bytes_done) +- +- movq -8(%rdi, %rdx), %rax +- movq -8(%rsi, %rdx), %rcx +- +- bswap %rax +- bswap %rcx +- +- subq %rcx, %rax +- +-L(8_to_16_bytes_done): +- cmovne %edx, %eax +- sbbl %ecx, %ecx +- orl %ecx, %eax +- ret +-# else +- xorl %eax, %eax +- movl (%rdi), %ecx +- cmpl (%rsi), %ecx +- jne L(8_to_16_bytes_done) +- movl 4(%rdi), %ecx +- cmpl 4(%rsi), %ecx +- jne L(8_to_16_bytes_done) +- movl -4(%rdi, %rdx), %ecx +- cmpl -4(%rsi, %rdx), %ecx +- jne L(8_to_16_bytes_done) +- ret +-# endif +- +- .p2align 4,, 3 +-L(ret_zero): +- xorl %eax, %eax +-L(zero): +- ret +- +- .p2align 4,, 8 +-L(firstbyte): +- jb L(ret_zero) +-# ifdef USE_AS_WMEMCMP +- xorl %eax, %eax +- movl (%rdi), %ecx +- cmpl (%rsi), %ecx +- je L(zero) +-L(8_to_16_bytes_done): +- setg %al +- leal -1(%rax, %rax), %eax +-# else +- movzbl (%rdi), %eax +- movzbl (%rsi), %ecx +- sub %ecx, %eax +-# endif +- ret +- +- .p2align 4 +-L(vec_return_begin_48): +- addq $16, %rdi +- addq $16, %rsi +-L(vec_return_begin_32): +- bsfl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl 32(%rdi, %rax), %ecx +- xorl %edx, %edx +- cmpl 32(%rsi, %rax), %ecx +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl 32(%rsi, %rax), %ecx +- movzbl 32(%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret +- +- .p2align 4 +-L(vec_return_begin_16): +- addq $16, %rdi +- addq $16, %rsi +-L(vec_return_begin): +- bsfl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rax), %ecx +- xorl %edx, %edx +- cmpl (%rsi, %rax), %ecx +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rsi, %rax), %ecx +- movzbl (%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret +- +- .p2align 4 +-L(vec_return_end_16): +- subl $16, %edx +-L(vec_return_end): +- bsfl %eax, %eax +- addl %edx, %eax +-# ifdef USE_AS_WMEMCMP +- movl -16(%rdi, %rax), %ecx +- xorl %edx, %edx +- cmpl -16(%rsi, %rax), %ecx +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl -16(%rsi, %rax), %ecx +- movzbl -16(%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret +- +- .p2align 4,, 8 +-L(more_32_bytes): +- movdqu (%rdi), %xmm0 +- movdqu (%rsi), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu 16(%rdi), %xmm0 +- movdqu 16(%rsi), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- cmpl $64, %edx +- jbe L(32_to_64_bytes) +- movdqu 32(%rdi), %xmm0 +- movdqu 32(%rsi), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- .p2align 4,, 6 +-L(32_to_64_bytes): +- movdqu -32(%rdi, %rdx), %xmm0 +- movdqu -32(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end_16) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- .p2align 4 +-L(16_to_32_bytes): +- movdqu (%rdi), %xmm0 +- movdqu (%rsi), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- +- .p2align 4 +-L(79bytesormore): +- movdqu (%rdi), %xmm0 +- movdqu (%rsi), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- +- mov %rsi, %rcx +- and $-16, %rsi +- add $16, %rsi +- sub %rsi, %rcx +- +- sub %rcx, %rdi +- add %rcx, %rdx +- test $0xf, %rdi +- jz L(2aligned) +- +- cmp $128, %rdx +- ja L(128bytesormore) +- +- .p2align 4,, 6 +-L(less128bytes): +- movdqu (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqu 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqu 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- cmp $96, %rdx +- jb L(32_to_64_bytes) +- +- addq $64, %rdi +- addq $64, %rsi +- subq $64, %rdx +- +- .p2align 4,, 6 +-L(last_64_bytes): +- movdqu (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqu -32(%rdi, %rdx), %xmm0 +- movdqu -32(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end_16) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- .p2align 4 +-L(128bytesormore): +- cmp $256, %rdx +- ja L(unaligned_loop) +-L(less256bytes): +- movdqu (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqu 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqu 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- addq $64, %rdi +- addq $64, %rsi +- +- movdqu (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqu 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqu 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqu 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- addq $-128, %rdx +- subq $-64, %rsi +- subq $-64, %rdi +- +- cmp $64, %rdx +- ja L(less128bytes) +- +- cmp $32, %rdx +- ja L(last_64_bytes) +- +- movdqu -32(%rdi, %rdx), %xmm0 +- movdqu -32(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end_16) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- .p2align 4 +-L(unaligned_loop): +-# ifdef DATA_CACHE_SIZE_HALF +- mov $DATA_CACHE_SIZE_HALF, %R8_LP +-# else +- mov __x86_data_cache_size_half(%rip), %R8_LP +-# endif +- movq %r8, %r9 +- addq %r8, %r8 +- addq %r9, %r8 +- cmpq %r8, %rdx +- ja L(L2_L3_cache_unaligned) +- sub $64, %rdx +- .p2align 4 +-L(64bytesormore_loop): +- movdqu (%rdi), %xmm0 +- movdqu 16(%rdi), %xmm1 +- movdqu 32(%rdi), %xmm2 +- movdqu 48(%rdi), %xmm3 +- +- CMPEQ (%rsi), %xmm0 +- CMPEQ 16(%rsi), %xmm1 +- CMPEQ 32(%rsi), %xmm2 +- CMPEQ 48(%rsi), %xmm3 +- +- pand %xmm0, %xmm1 +- pand %xmm2, %xmm3 +- pand %xmm1, %xmm3 +- +- pmovmskb %xmm3, %eax +- incw %ax +- jnz L(64bytesormore_loop_end) +- +- add $64, %rsi +- add $64, %rdi +- sub $64, %rdx +- ja L(64bytesormore_loop) +- +- .p2align 4,, 6 +-L(loop_tail): +- addq %rdx, %rdi +- movdqu (%rdi), %xmm0 +- movdqu 16(%rdi), %xmm1 +- movdqu 32(%rdi), %xmm2 +- movdqu 48(%rdi), %xmm3 +- +- addq %rdx, %rsi +- movdqu (%rsi), %xmm4 +- movdqu 16(%rsi), %xmm5 +- movdqu 32(%rsi), %xmm6 +- movdqu 48(%rsi), %xmm7 +- +- CMPEQ %xmm4, %xmm0 +- CMPEQ %xmm5, %xmm1 +- CMPEQ %xmm6, %xmm2 +- CMPEQ %xmm7, %xmm3 +- +- pand %xmm0, %xmm1 +- pand %xmm2, %xmm3 +- pand %xmm1, %xmm3 +- +- pmovmskb %xmm3, %eax +- incw %ax +- jnz L(64bytesormore_loop_end) +- ret +- +-L(L2_L3_cache_unaligned): +- subq $64, %rdx +- .p2align 4 +-L(L2_L3_unaligned_128bytes_loop): +- prefetchnta 0x1c0(%rdi) +- prefetchnta 0x1c0(%rsi) +- +- movdqu (%rdi), %xmm0 +- movdqu 16(%rdi), %xmm1 +- movdqu 32(%rdi), %xmm2 +- movdqu 48(%rdi), %xmm3 +- +- CMPEQ (%rsi), %xmm0 +- CMPEQ 16(%rsi), %xmm1 +- CMPEQ 32(%rsi), %xmm2 +- CMPEQ 48(%rsi), %xmm3 +- +- pand %xmm0, %xmm1 +- pand %xmm2, %xmm3 +- pand %xmm1, %xmm3 +- +- pmovmskb %xmm3, %eax +- incw %ax +- jnz L(64bytesormore_loop_end) +- +- add $64, %rsi +- add $64, %rdi +- sub $64, %rdx +- ja L(L2_L3_unaligned_128bytes_loop) +- jmp L(loop_tail) +- +- +- /* This case is for machines which are sensitive for unaligned +- * instructions. */ +- .p2align 4 +-L(2aligned): +- cmp $128, %rdx +- ja L(128bytesormorein2aligned) +-L(less128bytesin2aligned): +- movdqa (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqa 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqa 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqa 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- cmp $96, %rdx +- jb L(32_to_64_bytes) +- +- addq $64, %rdi +- addq $64, %rsi +- subq $64, %rdx +- +- .p2align 4,, 6 +-L(aligned_last_64_bytes): +- movdqa (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqa 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqu -32(%rdi, %rdx), %xmm0 +- movdqu -32(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end_16) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- .p2align 4 +-L(128bytesormorein2aligned): +- cmp $256, %rdx +- ja L(aligned_loop) +-L(less256bytesin2alinged): +- movdqa (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqa 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqa 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqa 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- addq $64, %rdi +- addq $64, %rsi +- +- movdqa (%rdi), %xmm1 +- CMPEQ (%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin) +- +- movdqa 16(%rdi), %xmm1 +- CMPEQ 16(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_16) +- +- movdqa 32(%rdi), %xmm1 +- CMPEQ 32(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_32) +- +- movdqa 48(%rdi), %xmm1 +- CMPEQ 48(%rsi), %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_begin_48) +- +- addq $-128, %rdx +- subq $-64, %rsi +- subq $-64, %rdi +- +- cmp $64, %rdx +- ja L(less128bytesin2aligned) +- +- cmp $32, %rdx +- ja L(aligned_last_64_bytes) +- +- movdqu -32(%rdi, %rdx), %xmm0 +- movdqu -32(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end_16) +- +- movdqu -16(%rdi, %rdx), %xmm0 +- movdqu -16(%rsi, %rdx), %xmm1 +- CMPEQ %xmm0, %xmm1 +- pmovmskb %xmm1, %eax +- incw %ax +- jnz L(vec_return_end) +- ret +- +- .p2align 4 +-L(aligned_loop): +-# ifdef DATA_CACHE_SIZE_HALF +- mov $DATA_CACHE_SIZE_HALF, %R8_LP +-# else +- mov __x86_data_cache_size_half(%rip), %R8_LP +-# endif +- movq %r8, %r9 +- addq %r8, %r8 +- addq %r9, %r8 +- cmpq %r8, %rdx +- ja L(L2_L3_cache_aligned) +- +- sub $64, %rdx +- .p2align 4 +-L(64bytesormore_loopin2aligned): +- movdqa (%rdi), %xmm0 +- movdqa 16(%rdi), %xmm1 +- movdqa 32(%rdi), %xmm2 +- movdqa 48(%rdi), %xmm3 +- +- CMPEQ (%rsi), %xmm0 +- CMPEQ 16(%rsi), %xmm1 +- CMPEQ 32(%rsi), %xmm2 +- CMPEQ 48(%rsi), %xmm3 +- +- pand %xmm0, %xmm1 +- pand %xmm2, %xmm3 +- pand %xmm1, %xmm3 +- +- pmovmskb %xmm3, %eax +- incw %ax +- jnz L(64bytesormore_loop_end) +- add $64, %rsi +- add $64, %rdi +- sub $64, %rdx +- ja L(64bytesormore_loopin2aligned) +- jmp L(loop_tail) +- +-L(L2_L3_cache_aligned): +- subq $64, %rdx +- .p2align 4 +-L(L2_L3_aligned_128bytes_loop): +- prefetchnta 0x1c0(%rdi) +- prefetchnta 0x1c0(%rsi) +- movdqa (%rdi), %xmm0 +- movdqa 16(%rdi), %xmm1 +- movdqa 32(%rdi), %xmm2 +- movdqa 48(%rdi), %xmm3 +- +- CMPEQ (%rsi), %xmm0 +- CMPEQ 16(%rsi), %xmm1 +- CMPEQ 32(%rsi), %xmm2 +- CMPEQ 48(%rsi), %xmm3 +- +- pand %xmm0, %xmm1 +- pand %xmm2, %xmm3 +- pand %xmm1, %xmm3 +- +- pmovmskb %xmm3, %eax +- incw %ax +- jnz L(64bytesormore_loop_end) +- +- addq $64, %rsi +- addq $64, %rdi +- subq $64, %rdx +- ja L(L2_L3_aligned_128bytes_loop) +- jmp L(loop_tail) +- +- .p2align 4 +-L(64bytesormore_loop_end): +- pmovmskb %xmm0, %ecx +- incw %cx +- jnz L(loop_end_ret) +- +- pmovmskb %xmm1, %ecx +- notw %cx +- sall $16, %ecx +- jnz L(loop_end_ret) +- +- pmovmskb %xmm2, %ecx +- notw %cx +- shlq $32, %rcx +- jnz L(loop_end_ret) +- +- addq $48, %rdi +- addq $48, %rsi +- movq %rax, %rcx +- +- .p2align 4,, 6 +-L(loop_end_ret): +- bsfq %rcx, %rcx +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rcx), %eax +- xorl %edx, %edx +- cmpl (%rsi, %rcx), %eax +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rdi, %rcx), %eax +- movzbl (%rsi, %rcx), %ecx +- subl %ecx, %eax +-# endif +- ret +-END (MEMCMP) +-#endif diff --git a/SOURCES/glibc-upstream-2.34-232.patch b/SOURCES/glibc-upstream-2.34-232.patch new file mode 100644 index 0000000..03ca852 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-232.patch @@ -0,0 +1,259 @@ +commit df5de87260dba479873b2850bbe5c0b81c2376f6 +Author: Noah Goldstein +Date: Fri Apr 15 12:28:01 2022 -0500 + + x86: Cleanup page cross code in memcmp-avx2-movbe.S + + Old code was both inefficient and wasted code size. New code (-62 + bytes) and comparable or better performance in the page cross case. + + geometric_mean(N=20) of page cross cases New / Original: 0.960 + + size, align0, align1, ret, New Time/Old Time + 1, 4095, 0, 0, 1.001 + 1, 4095, 0, 1, 0.999 + 1, 4095, 0, -1, 1.0 + 2, 4094, 0, 0, 1.0 + 2, 4094, 0, 1, 1.0 + 2, 4094, 0, -1, 1.0 + 3, 4093, 0, 0, 1.0 + 3, 4093, 0, 1, 1.0 + 3, 4093, 0, -1, 1.0 + 4, 4092, 0, 0, 0.987 + 4, 4092, 0, 1, 1.0 + 4, 4092, 0, -1, 1.0 + 5, 4091, 0, 0, 0.984 + 5, 4091, 0, 1, 1.002 + 5, 4091, 0, -1, 1.005 + 6, 4090, 0, 0, 0.993 + 6, 4090, 0, 1, 1.001 + 6, 4090, 0, -1, 1.003 + 7, 4089, 0, 0, 0.991 + 7, 4089, 0, 1, 1.0 + 7, 4089, 0, -1, 1.001 + 8, 4088, 0, 0, 0.875 + 8, 4088, 0, 1, 0.881 + 8, 4088, 0, -1, 0.888 + 9, 4087, 0, 0, 0.872 + 9, 4087, 0, 1, 0.879 + 9, 4087, 0, -1, 0.883 + 10, 4086, 0, 0, 0.878 + 10, 4086, 0, 1, 0.886 + 10, 4086, 0, -1, 0.873 + 11, 4085, 0, 0, 0.878 + 11, 4085, 0, 1, 0.881 + 11, 4085, 0, -1, 0.879 + 12, 4084, 0, 0, 0.873 + 12, 4084, 0, 1, 0.889 + 12, 4084, 0, -1, 0.875 + 13, 4083, 0, 0, 0.873 + 13, 4083, 0, 1, 0.863 + 13, 4083, 0, -1, 0.863 + 14, 4082, 0, 0, 0.838 + 14, 4082, 0, 1, 0.869 + 14, 4082, 0, -1, 0.877 + 15, 4081, 0, 0, 0.841 + 15, 4081, 0, 1, 0.869 + 15, 4081, 0, -1, 0.876 + 16, 4080, 0, 0, 0.988 + 16, 4080, 0, 1, 0.99 + 16, 4080, 0, -1, 0.989 + 17, 4079, 0, 0, 0.978 + 17, 4079, 0, 1, 0.981 + 17, 4079, 0, -1, 0.98 + 18, 4078, 0, 0, 0.981 + 18, 4078, 0, 1, 0.98 + 18, 4078, 0, -1, 0.985 + 19, 4077, 0, 0, 0.977 + 19, 4077, 0, 1, 0.979 + 19, 4077, 0, -1, 0.986 + 20, 4076, 0, 0, 0.977 + 20, 4076, 0, 1, 0.986 + 20, 4076, 0, -1, 0.984 + 21, 4075, 0, 0, 0.977 + 21, 4075, 0, 1, 0.983 + 21, 4075, 0, -1, 0.988 + 22, 4074, 0, 0, 0.983 + 22, 4074, 0, 1, 0.994 + 22, 4074, 0, -1, 0.993 + 23, 4073, 0, 0, 0.98 + 23, 4073, 0, 1, 0.992 + 23, 4073, 0, -1, 0.995 + 24, 4072, 0, 0, 0.989 + 24, 4072, 0, 1, 0.989 + 24, 4072, 0, -1, 0.991 + 25, 4071, 0, 0, 0.99 + 25, 4071, 0, 1, 0.999 + 25, 4071, 0, -1, 0.996 + 26, 4070, 0, 0, 0.993 + 26, 4070, 0, 1, 0.995 + 26, 4070, 0, -1, 0.998 + 27, 4069, 0, 0, 0.993 + 27, 4069, 0, 1, 0.999 + 27, 4069, 0, -1, 1.0 + 28, 4068, 0, 0, 0.997 + 28, 4068, 0, 1, 1.0 + 28, 4068, 0, -1, 0.999 + 29, 4067, 0, 0, 0.996 + 29, 4067, 0, 1, 0.999 + 29, 4067, 0, -1, 0.999 + 30, 4066, 0, 0, 0.991 + 30, 4066, 0, 1, 1.001 + 30, 4066, 0, -1, 0.999 + 31, 4065, 0, 0, 0.988 + 31, 4065, 0, 1, 0.998 + 31, 4065, 0, -1, 0.998 + Reviewed-by: H.J. Lu + + (cherry picked from commit 23102686ec67b856a2d4fd25ddaa1c0b8d175c4f) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +index 2621ec907aedb781..ec9cf0852edf216d 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +@@ -429,22 +429,21 @@ L(page_cross_less_vec): + # ifndef USE_AS_WMEMCMP + cmpl $8, %edx + jae L(between_8_15) ++ /* Fall through for [4, 7]. */ + cmpl $4, %edx +- jae L(between_4_7) ++ jb L(between_2_3) + +- /* Load as big endian to avoid branches. */ +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- /* Subtraction is okay because the upper 8 bits are zero. */ +- subl %ecx, %eax ++ movbe (%rdi), %eax ++ movbe (%rsi), %ecx ++ shlq $32, %rax ++ shlq $32, %rcx ++ movbe -4(%rdi, %rdx), %edi ++ movbe -4(%rsi, %rdx), %esi ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ /* Fast path for return zero. */ ++ jnz L(ret_nonzero) + /* No ymm register was touched. */ + ret + +@@ -457,9 +456,33 @@ L(one_or_less): + /* No ymm register was touched. */ + ret + ++ .p2align 4,, 5 ++L(ret_nonzero): ++ sbbl %eax, %eax ++ orl $1, %eax ++ /* No ymm register was touched. */ ++ ret ++ ++ .p2align 4,, 2 ++L(zero): ++ xorl %eax, %eax ++ /* No ymm register was touched. */ ++ ret ++ + .p2align 4 + L(between_8_15): +-# endif ++ movbe (%rdi), %rax ++ movbe (%rsi), %rcx ++ subq %rcx, %rax ++ jnz L(ret_nonzero) ++ movbe -8(%rdi, %rdx), %rax ++ movbe -8(%rsi, %rdx), %rcx ++ subq %rcx, %rax ++ /* Fast path for return zero. */ ++ jnz L(ret_nonzero) ++ /* No ymm register was touched. */ ++ ret ++# else + /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */ + vmovq (%rdi), %xmm1 + vmovq (%rsi), %xmm2 +@@ -475,16 +498,13 @@ L(between_8_15): + VPCMPEQ %xmm1, %xmm2, %xmm2 + vpmovmskb %xmm2, %eax + subl $0xffff, %eax ++ /* Fast path for return zero. */ + jnz L(return_vec_0) + /* No ymm register was touched. */ + ret ++# endif + +- .p2align 4 +-L(zero): +- xorl %eax, %eax +- ret +- +- .p2align 4 ++ .p2align 4,, 10 + L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ + vmovdqu (%rsi), %xmm2 +@@ -501,11 +521,17 @@ L(between_16_31): + VPCMPEQ (%rdi), %xmm2, %xmm2 + vpmovmskb %xmm2, %eax + subl $0xffff, %eax ++ /* Fast path for return zero. */ + jnz L(return_vec_0) + /* No ymm register was touched. */ + ret + + # ifdef USE_AS_WMEMCMP ++ .p2align 4,, 2 ++L(zero): ++ xorl %eax, %eax ++ ret ++ + .p2align 4 + L(one_or_less): + jb L(zero) +@@ -520,22 +546,20 @@ L(one_or_less): + # else + + .p2align 4 +-L(between_4_7): +- /* Load as big endian with overlapping movbe to avoid branches. +- */ +- movbe (%rdi), %eax +- movbe (%rsi), %ecx +- shlq $32, %rax +- shlq $32, %rcx +- movbe -4(%rdi, %rdx), %edi +- movbe -4(%rsi, %rdx), %esi +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- jz L(zero_4_7) +- sbbl %eax, %eax +- orl $1, %eax +-L(zero_4_7): ++L(between_2_3): ++ /* Load as big endian to avoid branches. */ ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ bswap %eax ++ bswap %ecx ++ shrl %eax ++ shrl %ecx ++ movzbl -1(%rdi, %rdx), %edi ++ movzbl -1(%rsi, %rdx), %esi ++ orl %edi, %eax ++ orl %esi, %ecx ++ /* Subtraction is okay because the upper bit is zero. */ ++ subl %ecx, %eax + /* No ymm register was touched. */ + ret + # endif diff --git a/SOURCES/glibc-upstream-2.34-233.patch b/SOURCES/glibc-upstream-2.34-233.patch new file mode 100644 index 0000000..30c79de --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-233.patch @@ -0,0 +1,865 @@ +commit 0a11305416e287d85c64f04337cfd64b6b350e0c +Author: Noah Goldstein +Date: Thu Apr 21 20:52:28 2022 -0500 + + x86: Optimize {str|wcs}rchr-sse2 + + The new code unrolls the main loop slightly without adding too much + overhead and minimizes the comparisons for the search CHAR. + + Geometric Mean of all benchmarks New / Old: 0.741 + See email for all results. + + Full xcheck passes on x86_64 with and without multiarch enabled. + Reviewed-by: H.J. Lu + + (cherry picked from commit 5307aa9c1800f36a64c183c091c9af392c1fa75c) + +diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S +index 67c30d0260cef8a3..a56300bc1830dedd 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-sse2.S ++++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S +@@ -17,7 +17,7 @@ + . */ + + #if IS_IN (libc) +-# define strrchr __strrchr_sse2 ++# define STRRCHR __strrchr_sse2 + + # undef weak_alias + # define weak_alias(strrchr, rindex) +diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S +index a36034b40afe8d3d..00f69f2be77a43a0 100644 +--- a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S ++++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S +@@ -17,7 +17,6 @@ + . */ + + #if IS_IN (libc) +-# define wcsrchr __wcsrchr_sse2 ++# define STRRCHR __wcsrchr_sse2 + #endif +- + #include "../wcsrchr.S" +diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S +index dfd09fe9508cb5bc..fc1598bb11417fd5 100644 +--- a/sysdeps/x86_64/strrchr.S ++++ b/sysdeps/x86_64/strrchr.S +@@ -19,210 +19,360 @@ + + #include + ++#ifndef STRRCHR ++# define STRRCHR strrchr ++#endif ++ ++#ifdef USE_AS_WCSRCHR ++# define PCMPEQ pcmpeqd ++# define CHAR_SIZE 4 ++# define PMINU pminud ++#else ++# define PCMPEQ pcmpeqb ++# define CHAR_SIZE 1 ++# define PMINU pminub ++#endif ++ ++#define PAGE_SIZE 4096 ++#define VEC_SIZE 16 ++ + .text +-ENTRY (strrchr) +- movd %esi, %xmm1 ++ENTRY(STRRCHR) ++ movd %esi, %xmm0 + movq %rdi, %rax +- andl $4095, %eax +- punpcklbw %xmm1, %xmm1 +- cmpq $4032, %rax +- punpcklwd %xmm1, %xmm1 +- pshufd $0, %xmm1, %xmm1 ++ andl $(PAGE_SIZE - 1), %eax ++#ifndef USE_AS_WCSRCHR ++ punpcklbw %xmm0, %xmm0 ++ punpcklwd %xmm0, %xmm0 ++#endif ++ pshufd $0, %xmm0, %xmm0 ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page) +- movdqu (%rdi), %xmm0 ++ ++L(cross_page_continue): ++ movups (%rdi), %xmm1 + pxor %xmm2, %xmm2 +- movdqa %xmm0, %xmm3 +- pcmpeqb %xmm1, %xmm0 +- pcmpeqb %xmm2, %xmm3 +- pmovmskb %xmm0, %ecx +- pmovmskb %xmm3, %edx +- testq %rdx, %rdx +- je L(next_48_bytes) +- leaq -1(%rdx), %rax +- xorq %rdx, %rax +- andq %rcx, %rax +- je L(exit) +- bsrq %rax, %rax ++ PCMPEQ %xmm1, %xmm2 ++ pmovmskb %xmm2, %ecx ++ testl %ecx, %ecx ++ jz L(aligned_more) ++ ++ PCMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ leal -1(%rcx), %edx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(ret0) ++ bsrl %eax, %eax + addq %rdi, %rax ++ /* We are off by 3 for wcsrchr if search CHAR is non-zero. If ++ search CHAR is zero we are correct. Either way `andq ++ -CHAR_SIZE, %rax` gets the correct result. */ ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++L(ret0): + ret + ++ /* Returns for first vec x1/x2 have hard coded backward search ++ path for earlier matches. */ + .p2align 4 +-L(next_48_bytes): +- movdqu 16(%rdi), %xmm4 +- movdqa %xmm4, %xmm5 +- movdqu 32(%rdi), %xmm3 +- pcmpeqb %xmm1, %xmm4 +- pcmpeqb %xmm2, %xmm5 +- movdqu 48(%rdi), %xmm0 +- pmovmskb %xmm5, %edx +- movdqa %xmm3, %xmm5 +- pcmpeqb %xmm1, %xmm3 +- pcmpeqb %xmm2, %xmm5 +- pcmpeqb %xmm0, %xmm2 +- salq $16, %rdx +- pmovmskb %xmm3, %r8d +- pmovmskb %xmm5, %eax +- pmovmskb %xmm2, %esi +- salq $32, %r8 +- salq $32, %rax +- pcmpeqb %xmm1, %xmm0 +- orq %rdx, %rax +- movq %rsi, %rdx +- pmovmskb %xmm4, %esi +- salq $48, %rdx +- salq $16, %rsi +- orq %r8, %rsi +- orq %rcx, %rsi +- pmovmskb %xmm0, %ecx +- salq $48, %rcx +- orq %rcx, %rsi +- orq %rdx, %rax +- je L(loop_header2) +- leaq -1(%rax), %rcx +- xorq %rax, %rcx +- andq %rcx, %rsi +- je L(exit) +- bsrq %rsi, %rsi +- leaq (%rdi,%rsi), %rax ++L(first_vec_x0_test): ++ PCMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ testl %eax, %eax ++ jz L(ret0) ++ bsrl %eax, %eax ++ addq %r8, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif + ret + + .p2align 4 +-L(loop_header2): +- testq %rsi, %rsi +- movq %rdi, %rcx +- je L(no_c_found) +-L(loop_header): +- addq $64, %rdi +- pxor %xmm7, %xmm7 +- andq $-64, %rdi +- jmp L(loop_entry) ++L(first_vec_x1): ++ PCMPEQ %xmm0, %xmm2 ++ pmovmskb %xmm2, %eax ++ leal -1(%rcx), %edx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(first_vec_x0_test) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE)(%rdi, %rax), %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++ ret + + .p2align 4 +-L(loop64): +- testq %rdx, %rdx +- cmovne %rdx, %rsi +- cmovne %rdi, %rcx +- addq $64, %rdi +-L(loop_entry): +- movdqa 32(%rdi), %xmm3 +- pxor %xmm6, %xmm6 +- movdqa 48(%rdi), %xmm2 +- movdqa %xmm3, %xmm0 +- movdqa 16(%rdi), %xmm4 +- pminub %xmm2, %xmm0 +- movdqa (%rdi), %xmm5 +- pminub %xmm4, %xmm0 +- pminub %xmm5, %xmm0 +- pcmpeqb %xmm7, %xmm0 +- pmovmskb %xmm0, %eax +- movdqa %xmm5, %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %r9d +- movdqa %xmm4, %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %edx +- movdqa %xmm3, %xmm0 +- pcmpeqb %xmm1, %xmm0 +- salq $16, %rdx +- pmovmskb %xmm0, %r10d +- movdqa %xmm2, %xmm0 +- pcmpeqb %xmm1, %xmm0 +- salq $32, %r10 +- orq %r10, %rdx +- pmovmskb %xmm0, %r8d +- orq %r9, %rdx +- salq $48, %r8 +- orq %r8, %rdx ++L(first_vec_x1_test): ++ PCMPEQ %xmm0, %xmm2 ++ pmovmskb %xmm2, %eax + testl %eax, %eax +- je L(loop64) +- pcmpeqb %xmm6, %xmm4 +- pcmpeqb %xmm6, %xmm3 +- pcmpeqb %xmm6, %xmm5 +- pmovmskb %xmm4, %eax +- pmovmskb %xmm3, %r10d +- pcmpeqb %xmm6, %xmm2 +- pmovmskb %xmm5, %r9d +- salq $32, %r10 +- salq $16, %rax +- pmovmskb %xmm2, %r8d +- orq %r10, %rax +- orq %r9, %rax +- salq $48, %r8 +- orq %r8, %rax +- leaq -1(%rax), %r8 +- xorq %rax, %r8 +- andq %r8, %rdx +- cmovne %rdi, %rcx +- cmovne %rdx, %rsi +- bsrq %rsi, %rsi +- leaq (%rcx,%rsi), %rax ++ jz L(first_vec_x0_test) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE)(%rdi, %rax), %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++ ret ++ ++ .p2align 4 ++L(first_vec_x2): ++ PCMPEQ %xmm0, %xmm3 ++ pmovmskb %xmm3, %eax ++ leal -1(%rcx), %edx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(first_vec_x1_test) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++ ret ++ ++ .p2align 4 ++L(aligned_more): ++ /* Save original pointer if match was in VEC 0. */ ++ movq %rdi, %r8 ++ andq $-VEC_SIZE, %rdi ++ ++ movaps VEC_SIZE(%rdi), %xmm2 ++ pxor %xmm3, %xmm3 ++ PCMPEQ %xmm2, %xmm3 ++ pmovmskb %xmm3, %ecx ++ testl %ecx, %ecx ++ jnz L(first_vec_x1) ++ ++ movaps (VEC_SIZE * 2)(%rdi), %xmm3 ++ pxor %xmm4, %xmm4 ++ PCMPEQ %xmm3, %xmm4 ++ pmovmskb %xmm4, %ecx ++ testl %ecx, %ecx ++ jnz L(first_vec_x2) ++ ++ addq $VEC_SIZE, %rdi ++ /* Save pointer again before realigning. */ ++ movq %rdi, %rsi ++ andq $-(VEC_SIZE * 2), %rdi ++ .p2align 4 ++L(first_loop): ++ /* Do 2x VEC at a time. */ ++ movaps (VEC_SIZE * 2)(%rdi), %xmm4 ++ movaps (VEC_SIZE * 3)(%rdi), %xmm5 ++ /* Since SSE2 no pminud so wcsrchr needs seperate logic for ++ detecting zero. Note if this is found to be a bottleneck it ++ may be worth adding an SSE4.1 wcsrchr implementation. */ ++#ifdef USE_AS_WCSRCHR ++ movaps %xmm5, %xmm6 ++ pxor %xmm8, %xmm8 ++ ++ PCMPEQ %xmm8, %xmm5 ++ PCMPEQ %xmm4, %xmm8 ++ por %xmm5, %xmm8 ++#else ++ movaps %xmm5, %xmm6 ++ PMINU %xmm4, %xmm5 ++#endif ++ ++ movaps %xmm4, %xmm9 ++ PCMPEQ %xmm0, %xmm4 ++ PCMPEQ %xmm0, %xmm6 ++ movaps %xmm6, %xmm7 ++ por %xmm4, %xmm6 ++#ifndef USE_AS_WCSRCHR ++ pxor %xmm8, %xmm8 ++ PCMPEQ %xmm5, %xmm8 ++#endif ++ pmovmskb %xmm8, %ecx ++ pmovmskb %xmm6, %eax ++ ++ addq $(VEC_SIZE * 2), %rdi ++ /* Use `addl` 1) so we can undo it with `subl` and 2) it can ++ macro-fuse with `jz`. */ ++ addl %ecx, %eax ++ jz L(first_loop) ++ ++ /* Check if there is zero match. */ ++ testl %ecx, %ecx ++ jz L(second_loop_match) ++ ++ /* Check if there was a match in last iteration. */ ++ subl %ecx, %eax ++ jnz L(new_match) ++ ++L(first_loop_old_match): ++ PCMPEQ %xmm0, %xmm2 ++ PCMPEQ %xmm0, %xmm3 ++ pmovmskb %xmm2, %ecx ++ pmovmskb %xmm3, %eax ++ addl %eax, %ecx ++ jz L(first_vec_x0_test) ++ /* NB: We could move this shift to before the branch and save a ++ bit of code size / performance on the fall through. The ++ branch leads to the null case which generally seems hotter ++ than char in first 3x VEC. */ ++ sall $16, %eax ++ orl %ecx, %eax ++ ++ bsrl %eax, %eax ++ addq %rsi, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++ ret ++ ++ .p2align 4 ++L(new_match): ++ pxor %xmm6, %xmm6 ++ PCMPEQ %xmm9, %xmm6 ++ pmovmskb %xmm6, %eax ++ sall $16, %ecx ++ orl %eax, %ecx ++ ++ /* We can't reuse either of the old comparisons as since we mask ++ of zeros after first zero (instead of using the full ++ comparison) we can't gurantee no interference between match ++ after end of string and valid match. */ ++ pmovmskb %xmm4, %eax ++ pmovmskb %xmm7, %edx ++ sall $16, %edx ++ orl %edx, %eax ++ ++ leal -1(%ecx), %edx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(first_loop_old_match) ++ bsrl %eax, %eax ++ addq %rdi, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif + ret + ++ /* Save minimum state for getting most recent match. We can ++ throw out all previous work. */ + .p2align 4 +-L(no_c_found): +- movl $1, %esi +- xorl %ecx, %ecx +- jmp L(loop_header) ++L(second_loop_match): ++ movq %rdi, %rsi ++ movaps %xmm4, %xmm2 ++ movaps %xmm7, %xmm3 + + .p2align 4 +-L(exit): +- xorl %eax, %eax ++L(second_loop): ++ movaps (VEC_SIZE * 2)(%rdi), %xmm4 ++ movaps (VEC_SIZE * 3)(%rdi), %xmm5 ++ /* Since SSE2 no pminud so wcsrchr needs seperate logic for ++ detecting zero. Note if this is found to be a bottleneck it ++ may be worth adding an SSE4.1 wcsrchr implementation. */ ++#ifdef USE_AS_WCSRCHR ++ movaps %xmm5, %xmm6 ++ pxor %xmm8, %xmm8 ++ ++ PCMPEQ %xmm8, %xmm5 ++ PCMPEQ %xmm4, %xmm8 ++ por %xmm5, %xmm8 ++#else ++ movaps %xmm5, %xmm6 ++ PMINU %xmm4, %xmm5 ++#endif ++ ++ movaps %xmm4, %xmm9 ++ PCMPEQ %xmm0, %xmm4 ++ PCMPEQ %xmm0, %xmm6 ++ movaps %xmm6, %xmm7 ++ por %xmm4, %xmm6 ++#ifndef USE_AS_WCSRCHR ++ pxor %xmm8, %xmm8 ++ PCMPEQ %xmm5, %xmm8 ++#endif ++ ++ pmovmskb %xmm8, %ecx ++ pmovmskb %xmm6, %eax ++ ++ addq $(VEC_SIZE * 2), %rdi ++ /* Either null term or new occurence of CHAR. */ ++ addl %ecx, %eax ++ jz L(second_loop) ++ ++ /* No null term so much be new occurence of CHAR. */ ++ testl %ecx, %ecx ++ jz L(second_loop_match) ++ ++ ++ subl %ecx, %eax ++ jnz L(second_loop_new_match) ++ ++L(second_loop_old_match): ++ pmovmskb %xmm2, %ecx ++ pmovmskb %xmm3, %eax ++ sall $16, %eax ++ orl %ecx, %eax ++ bsrl %eax, %eax ++ addq %rsi, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif + ret + + .p2align 4 ++L(second_loop_new_match): ++ pxor %xmm6, %xmm6 ++ PCMPEQ %xmm9, %xmm6 ++ pmovmskb %xmm6, %eax ++ sall $16, %ecx ++ orl %eax, %ecx ++ ++ /* We can't reuse either of the old comparisons as since we mask ++ of zeros after first zero (instead of using the full ++ comparison) we can't gurantee no interference between match ++ after end of string and valid match. */ ++ pmovmskb %xmm4, %eax ++ pmovmskb %xmm7, %edx ++ sall $16, %edx ++ orl %edx, %eax ++ ++ leal -1(%ecx), %edx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(second_loop_old_match) ++ bsrl %eax, %eax ++ addq %rdi, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++ ret ++ ++ .p2align 4,, 4 + L(cross_page): +- movq %rdi, %rax +- pxor %xmm0, %xmm0 +- andq $-64, %rax +- movdqu (%rax), %xmm5 +- movdqa %xmm5, %xmm6 +- movdqu 16(%rax), %xmm4 +- pcmpeqb %xmm1, %xmm5 +- pcmpeqb %xmm0, %xmm6 +- movdqu 32(%rax), %xmm3 +- pmovmskb %xmm6, %esi +- movdqa %xmm4, %xmm6 +- movdqu 48(%rax), %xmm2 +- pcmpeqb %xmm1, %xmm4 +- pcmpeqb %xmm0, %xmm6 +- pmovmskb %xmm6, %edx +- movdqa %xmm3, %xmm6 +- pcmpeqb %xmm1, %xmm3 +- pcmpeqb %xmm0, %xmm6 +- pcmpeqb %xmm2, %xmm0 +- salq $16, %rdx +- pmovmskb %xmm3, %r9d +- pmovmskb %xmm6, %r8d +- pmovmskb %xmm0, %ecx +- salq $32, %r9 +- salq $32, %r8 +- pcmpeqb %xmm1, %xmm2 +- orq %r8, %rdx +- salq $48, %rcx +- pmovmskb %xmm5, %r8d +- orq %rsi, %rdx +- pmovmskb %xmm4, %esi +- orq %rcx, %rdx +- pmovmskb %xmm2, %ecx +- salq $16, %rsi +- salq $48, %rcx +- orq %r9, %rsi +- orq %r8, %rsi +- orq %rcx, %rsi ++ movq %rdi, %rsi ++ andq $-VEC_SIZE, %rsi ++ movaps (%rsi), %xmm1 ++ pxor %xmm2, %xmm2 ++ PCMPEQ %xmm1, %xmm2 ++ pmovmskb %xmm2, %edx + movl %edi, %ecx +- subl %eax, %ecx +- shrq %cl, %rdx +- shrq %cl, %rsi +- testq %rdx, %rdx +- je L(loop_header2) +- leaq -1(%rdx), %rax +- xorq %rdx, %rax +- andq %rax, %rsi +- je L(exit) +- bsrq %rsi, %rax ++ andl $(VEC_SIZE - 1), %ecx ++ sarl %cl, %edx ++ jz L(cross_page_continue) ++ PCMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ sarl %cl, %eax ++ leal -1(%rdx), %ecx ++ xorl %edx, %ecx ++ andl %ecx, %eax ++ jz L(ret1) ++ bsrl %eax, %eax + addq %rdi, %rax ++#ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++#endif ++L(ret1): + ret +-END (strrchr) ++END(STRRCHR) + +-weak_alias (strrchr, rindex) +-libc_hidden_builtin_def (strrchr) ++#ifndef USE_AS_WCSRCHR ++ weak_alias (STRRCHR, rindex) ++ libc_hidden_builtin_def (STRRCHR) ++#endif +diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S +index 6b318d3f29de9a9e..9006f2220963d76c 100644 +--- a/sysdeps/x86_64/wcsrchr.S ++++ b/sysdeps/x86_64/wcsrchr.S +@@ -17,266 +17,12 @@ + License along with the GNU C Library; if not, see + . */ + +-#include + +- .text +-ENTRY (wcsrchr) ++#define USE_AS_WCSRCHR 1 ++#define NO_PMINU 1 + +- movd %rsi, %xmm1 +- mov %rdi, %rcx +- punpckldq %xmm1, %xmm1 +- pxor %xmm2, %xmm2 +- punpckldq %xmm1, %xmm1 +- and $63, %rcx +- cmp $48, %rcx +- ja L(crosscache) ++#ifndef STRRCHR ++# define STRRCHR wcsrchr ++#endif + +- movdqu (%rdi), %xmm0 +- pcmpeqd %xmm0, %xmm2 +- pcmpeqd %xmm1, %xmm0 +- pmovmskb %xmm2, %rcx +- pmovmskb %xmm0, %rax +- add $16, %rdi +- +- test %rax, %rax +- jnz L(unaligned_match1) +- +- test %rcx, %rcx +- jnz L(return_null) +- +- and $-16, %rdi +- xor %r8, %r8 +- jmp L(loop) +- +- .p2align 4 +-L(unaligned_match1): +- test %rcx, %rcx +- jnz L(prolog_find_zero_1) +- +- mov %rax, %r8 +- mov %rdi, %rsi +- and $-16, %rdi +- jmp L(loop) +- +- .p2align 4 +-L(crosscache): +- and $15, %rcx +- and $-16, %rdi +- pxor %xmm3, %xmm3 +- movdqa (%rdi), %xmm0 +- pcmpeqd %xmm0, %xmm3 +- pcmpeqd %xmm1, %xmm0 +- pmovmskb %xmm3, %rdx +- pmovmskb %xmm0, %rax +- shr %cl, %rdx +- shr %cl, %rax +- add $16, %rdi +- +- test %rax, %rax +- jnz L(unaligned_match) +- +- test %rdx, %rdx +- jnz L(return_null) +- +- xor %r8, %r8 +- jmp L(loop) +- +- .p2align 4 +-L(unaligned_match): +- test %rdx, %rdx +- jnz L(prolog_find_zero) +- +- mov %rax, %r8 +- lea (%rdi, %rcx), %rsi +- +-/* Loop start on aligned string. */ +- .p2align 4 +-L(loop): +- movdqa (%rdi), %xmm0 +- pcmpeqd %xmm0, %xmm2 +- add $16, %rdi +- pcmpeqd %xmm1, %xmm0 +- pmovmskb %xmm2, %rcx +- pmovmskb %xmm0, %rax +- or %rax, %rcx +- jnz L(matches) +- +- movdqa (%rdi), %xmm3 +- pcmpeqd %xmm3, %xmm2 +- add $16, %rdi +- pcmpeqd %xmm1, %xmm3 +- pmovmskb %xmm2, %rcx +- pmovmskb %xmm3, %rax +- or %rax, %rcx +- jnz L(matches) +- +- movdqa (%rdi), %xmm4 +- pcmpeqd %xmm4, %xmm2 +- add $16, %rdi +- pcmpeqd %xmm1, %xmm4 +- pmovmskb %xmm2, %rcx +- pmovmskb %xmm4, %rax +- or %rax, %rcx +- jnz L(matches) +- +- movdqa (%rdi), %xmm5 +- pcmpeqd %xmm5, %xmm2 +- add $16, %rdi +- pcmpeqd %xmm1, %xmm5 +- pmovmskb %xmm2, %rcx +- pmovmskb %xmm5, %rax +- or %rax, %rcx +- jz L(loop) +- +- .p2align 4 +-L(matches): +- test %rax, %rax +- jnz L(match) +-L(return_value): +- test %r8, %r8 +- jz L(return_null) +- mov %r8, %rax +- mov %rsi, %rdi +- +- test $15 << 4, %ah +- jnz L(match_fourth_wchar) +- test %ah, %ah +- jnz L(match_third_wchar) +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(match): +- pmovmskb %xmm2, %rcx +- test %rcx, %rcx +- jnz L(find_zero) +- mov %rax, %r8 +- mov %rdi, %rsi +- jmp L(loop) +- +- .p2align 4 +-L(find_zero): +- test $15, %cl +- jnz L(find_zero_in_first_wchar) +- test %cl, %cl +- jnz L(find_zero_in_second_wchar) +- test $15, %ch +- jnz L(find_zero_in_third_wchar) +- +- and $1 << 13 - 1, %rax +- jz L(return_value) +- +- test $15 << 4, %ah +- jnz L(match_fourth_wchar) +- test %ah, %ah +- jnz L(match_third_wchar) +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(find_zero_in_first_wchar): +- test $1, %rax +- jz L(return_value) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(find_zero_in_second_wchar): +- and $1 << 5 - 1, %rax +- jz L(return_value) +- +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(find_zero_in_third_wchar): +- and $1 << 9 - 1, %rax +- jz L(return_value) +- +- test %ah, %ah +- jnz L(match_third_wchar) +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(prolog_find_zero): +- add %rcx, %rdi +- mov %rdx, %rcx +-L(prolog_find_zero_1): +- test $15, %cl +- jnz L(prolog_find_zero_in_first_wchar) +- test %cl, %cl +- jnz L(prolog_find_zero_in_second_wchar) +- test $15, %ch +- jnz L(prolog_find_zero_in_third_wchar) +- +- and $1 << 13 - 1, %rax +- jz L(return_null) +- +- test $15 << 4, %ah +- jnz L(match_fourth_wchar) +- test %ah, %ah +- jnz L(match_third_wchar) +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(prolog_find_zero_in_first_wchar): +- test $1, %rax +- jz L(return_null) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(prolog_find_zero_in_second_wchar): +- and $1 << 5 - 1, %rax +- jz L(return_null) +- +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(prolog_find_zero_in_third_wchar): +- and $1 << 9 - 1, %rax +- jz L(return_null) +- +- test %ah, %ah +- jnz L(match_third_wchar) +- test $15 << 4, %al +- jnz L(match_second_wchar) +- lea -16(%rdi), %rax +- ret +- +- .p2align 4 +-L(match_second_wchar): +- lea -12(%rdi), %rax +- ret +- +- .p2align 4 +-L(match_third_wchar): +- lea -8(%rdi), %rax +- ret +- +- .p2align 4 +-L(match_fourth_wchar): +- lea -4(%rdi), %rax +- ret +- +- .p2align 4 +-L(return_null): +- xor %rax, %rax +- ret +- +-END (wcsrchr) ++#include "../strrchr.S" diff --git a/SOURCES/glibc-upstream-2.34-234.patch b/SOURCES/glibc-upstream-2.34-234.patch new file mode 100644 index 0000000..4b8b07d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-234.patch @@ -0,0 +1,497 @@ +commit 00f09a14d2818f438959e764834abb3913f2b20a +Author: Noah Goldstein +Date: Thu Apr 21 20:52:29 2022 -0500 + + x86: Optimize {str|wcs}rchr-avx2 + + The new code unrolls the main loop slightly without adding too much + overhead and minimizes the comparisons for the search CHAR. + + Geometric Mean of all benchmarks New / Old: 0.832 + See email for all results. + + Full xcheck passes on x86_64 with and without multiarch enabled. + Reviewed-by: H.J. Lu + + (cherry picked from commit df7e295d18ffa34f629578c0017a9881af7620f6) + +diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S +index 0deba97114d3b83d..b8dec737d5213b25 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S +@@ -27,9 +27,13 @@ + # ifdef USE_AS_WCSRCHR + # define VPBROADCAST vpbroadcastd + # define VPCMPEQ vpcmpeqd ++# define VPMIN vpminud ++# define CHAR_SIZE 4 + # else + # define VPBROADCAST vpbroadcastb + # define VPCMPEQ vpcmpeqb ++# define VPMIN vpminub ++# define CHAR_SIZE 1 + # endif + + # ifndef VZEROUPPER +@@ -41,196 +45,304 @@ + # endif + + # define VEC_SIZE 32 ++# define PAGE_SIZE 4096 + +- .section SECTION(.text),"ax",@progbits +-ENTRY (STRRCHR) +- movd %esi, %xmm4 +- movl %edi, %ecx ++ .section SECTION(.text), "ax", @progbits ++ENTRY(STRRCHR) ++ movd %esi, %xmm7 ++ movl %edi, %eax + /* Broadcast CHAR to YMM4. */ +- VPBROADCAST %xmm4, %ymm4 ++ VPBROADCAST %xmm7, %ymm7 + vpxor %xmm0, %xmm0, %xmm0 + +- /* Check if we may cross page boundary with one vector load. */ +- andl $(2 * VEC_SIZE - 1), %ecx +- cmpl $VEC_SIZE, %ecx +- ja L(cros_page_boundary) ++ /* Shift here instead of `andl` to save code size (saves a fetch ++ block). */ ++ sall $20, %eax ++ cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax ++ ja L(cross_page) + ++L(page_cross_continue): + vmovdqu (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %ecx +- vpmovmskb %ymm3, %eax +- addq $VEC_SIZE, %rdi ++ /* Check end of string match. */ ++ VPCMPEQ %ymm1, %ymm0, %ymm6 ++ vpmovmskb %ymm6, %ecx ++ testl %ecx, %ecx ++ jz L(aligned_more) ++ ++ /* Only check match with search CHAR if needed. */ ++ VPCMPEQ %ymm1, %ymm7, %ymm1 ++ vpmovmskb %ymm1, %eax ++ /* Check if match before first zero. */ ++ blsmskl %ecx, %ecx ++ andl %ecx, %eax ++ jz L(ret0) ++ bsrl %eax, %eax ++ addq %rdi, %rax ++ /* We are off by 3 for wcsrchr if search CHAR is non-zero. If ++ search CHAR is zero we are correct. Either way `andq ++ -CHAR_SIZE, %rax` gets the correct result. */ ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++L(ret0): ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++ ++ /* Returns for first vec x1/x2 have hard coded backward search ++ path for earlier matches. */ ++ .p2align 4,, 10 ++L(first_vec_x1): ++ VPCMPEQ %ymm2, %ymm7, %ymm6 ++ vpmovmskb %ymm6, %eax ++ blsmskl %ecx, %ecx ++ andl %ecx, %eax ++ jnz L(first_vec_x1_return) ++ ++ .p2align 4,, 4 ++L(first_vec_x0_test): ++ VPCMPEQ %ymm1, %ymm7, %ymm6 ++ vpmovmskb %ymm6, %eax ++ testl %eax, %eax ++ jz L(ret1) ++ bsrl %eax, %eax ++ addq %r8, %rax ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++L(ret1): ++ VZEROUPPER_RETURN + ++ .p2align 4,, 10 ++L(first_vec_x0_x1_test): ++ VPCMPEQ %ymm2, %ymm7, %ymm6 ++ vpmovmskb %ymm6, %eax ++ /* Check ymm2 for search CHAR match. If no match then check ymm1 ++ before returning. */ + testl %eax, %eax +- jnz L(first_vec) ++ jz L(first_vec_x0_test) ++ .p2align 4,, 4 ++L(first_vec_x1_return): ++ bsrl %eax, %eax ++ leaq 1(%rdi, %rax), %rax ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++ VZEROUPPER_RETURN + +- testl %ecx, %ecx +- jnz L(return_null) + +- andq $-VEC_SIZE, %rdi +- xorl %edx, %edx +- jmp L(aligned_loop) ++ .p2align 4,, 10 ++L(first_vec_x2): ++ VPCMPEQ %ymm3, %ymm7, %ymm6 ++ vpmovmskb %ymm6, %eax ++ blsmskl %ecx, %ecx ++ /* If no in-range search CHAR match in ymm3 then need to check ++ ymm1/ymm2 for an earlier match (we delay checking search ++ CHAR matches until needed). */ ++ andl %ecx, %eax ++ jz L(first_vec_x0_x1_test) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE + 1)(%rdi, %rax), %rax ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++ VZEROUPPER_RETURN ++ + + .p2align 4 +-L(first_vec): +- /* Check if there is a nul CHAR. */ ++L(aligned_more): ++ /* Save original pointer if match was in VEC 0. */ ++ movq %rdi, %r8 ++ ++ /* Align src. */ ++ orq $(VEC_SIZE - 1), %rdi ++ vmovdqu 1(%rdi), %ymm2 ++ VPCMPEQ %ymm2, %ymm0, %ymm6 ++ vpmovmskb %ymm6, %ecx + testl %ecx, %ecx +- jnz L(char_and_nul_in_first_vec) ++ jnz L(first_vec_x1) + +- /* Remember the match and keep searching. */ +- movl %eax, %edx +- movq %rdi, %rsi +- andq $-VEC_SIZE, %rdi +- jmp L(aligned_loop) ++ vmovdqu (VEC_SIZE + 1)(%rdi), %ymm3 ++ VPCMPEQ %ymm3, %ymm0, %ymm6 ++ vpmovmskb %ymm6, %ecx ++ testl %ecx, %ecx ++ jnz L(first_vec_x2) + ++ /* Save pointer again before realigning. */ ++ movq %rdi, %rsi ++ addq $(VEC_SIZE + 1), %rdi ++ andq $-(VEC_SIZE * 2), %rdi + .p2align 4 +-L(cros_page_boundary): +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi +- vmovdqa (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %edx +- vpmovmskb %ymm3, %eax +- shrl %cl, %edx +- shrl %cl, %eax +- addq $VEC_SIZE, %rdi +- +- /* Check if there is a CHAR. */ ++L(first_aligned_loop): ++ /* Do 2x VEC at a time. Any more and the cost of finding the ++ match outweights loop benefit. */ ++ vmovdqa (VEC_SIZE * 0)(%rdi), %ymm4 ++ vmovdqa (VEC_SIZE * 1)(%rdi), %ymm5 ++ ++ VPCMPEQ %ymm4, %ymm7, %ymm6 ++ VPMIN %ymm4, %ymm5, %ymm8 ++ VPCMPEQ %ymm5, %ymm7, %ymm10 ++ vpor %ymm6, %ymm10, %ymm5 ++ VPCMPEQ %ymm8, %ymm0, %ymm8 ++ vpor %ymm5, %ymm8, %ymm9 ++ ++ vpmovmskb %ymm9, %eax ++ addq $(VEC_SIZE * 2), %rdi ++ /* No zero or search CHAR. */ + testl %eax, %eax +- jnz L(found_char) +- +- testl %edx, %edx +- jnz L(return_null) ++ jz L(first_aligned_loop) + +- jmp L(aligned_loop) +- +- .p2align 4 +-L(found_char): +- testl %edx, %edx +- jnz L(char_and_nul) ++ /* If no zero CHAR then go to second loop (this allows us to ++ throw away all prior work). */ ++ vpmovmskb %ymm8, %ecx ++ testl %ecx, %ecx ++ jz L(second_aligned_loop_prep) + +- /* Remember the match and keep searching. */ +- movl %eax, %edx +- leaq (%rdi, %rcx), %rsi ++ /* Search char could be zero so we need to get the true match. ++ */ ++ vpmovmskb %ymm5, %eax ++ testl %eax, %eax ++ jnz L(first_aligned_loop_return) + +- .p2align 4 +-L(aligned_loop): +- vmovdqa (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- addq $VEC_SIZE, %rdi +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %ecx +- vpmovmskb %ymm3, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) +- +- vmovdqa (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- add $VEC_SIZE, %rdi +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %ecx ++ .p2align 4,, 4 ++L(first_vec_x1_or_x2): ++ VPCMPEQ %ymm3, %ymm7, %ymm3 ++ VPCMPEQ %ymm2, %ymm7, %ymm2 + vpmovmskb %ymm3, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) +- +- vmovdqa (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- addq $VEC_SIZE, %rdi +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %ecx +- vpmovmskb %ymm3, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) +- +- vmovdqa (%rdi), %ymm1 +- VPCMPEQ %ymm1, %ymm0, %ymm2 +- addq $VEC_SIZE, %rdi +- VPCMPEQ %ymm1, %ymm4, %ymm3 +- vpmovmskb %ymm2, %ecx +- vpmovmskb %ymm3, %eax +- orl %eax, %ecx +- jz L(aligned_loop) +- +- .p2align 4 +-L(char_nor_null): +- /* Find a CHAR or a nul CHAR in a loop. */ +- testl %eax, %eax +- jnz L(match) +-L(return_value): +- testl %edx, %edx +- jz L(return_null) +- movl %edx, %eax +- movq %rsi, %rdi ++ vpmovmskb %ymm2, %edx ++ /* Use add for macro-fusion. */ ++ addq %rax, %rdx ++ jz L(first_vec_x0_test) ++ /* NB: We could move this shift to before the branch and save a ++ bit of code size / performance on the fall through. The ++ branch leads to the null case which generally seems hotter ++ than char in first 3x VEC. */ ++ salq $32, %rax ++ addq %rdx, %rax ++ bsrq %rax, %rax ++ leaq 1(%rsi, %rax), %rax ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++ VZEROUPPER_RETURN + ++ .p2align 4,, 8 ++L(first_aligned_loop_return): ++ VPCMPEQ %ymm4, %ymm0, %ymm4 ++ vpmovmskb %ymm4, %edx ++ salq $32, %rcx ++ orq %rdx, %rcx ++ ++ vpmovmskb %ymm10, %eax ++ vpmovmskb %ymm6, %edx ++ salq $32, %rax ++ orq %rdx, %rax ++ blsmskq %rcx, %rcx ++ andq %rcx, %rax ++ jz L(first_vec_x1_or_x2) ++ ++ bsrq %rax, %rax ++ leaq -(VEC_SIZE * 2)(%rdi, %rax), %rax + # ifdef USE_AS_WCSRCHR +- /* Keep the first bit for each matching CHAR for bsr. */ +- andl $0x11111111, %eax ++ andq $-CHAR_SIZE, %rax + # endif +- bsrl %eax, %eax +- leaq -VEC_SIZE(%rdi, %rax), %rax +-L(return_vzeroupper): +- ZERO_UPPER_VEC_REGISTERS_RETURN ++ VZEROUPPER_RETURN + ++ /* Search char cannot be zero. */ + .p2align 4 +-L(match): +- /* Find a CHAR. Check if there is a nul CHAR. */ +- vpmovmskb %ymm2, %ecx +- testl %ecx, %ecx +- jnz L(find_nul) +- +- /* Remember the match and keep searching. */ +- movl %eax, %edx ++L(second_aligned_loop_set_furthest_match): ++ /* Save VEC and pointer from most recent match. */ ++L(second_aligned_loop_prep): + movq %rdi, %rsi +- jmp L(aligned_loop) ++ vmovdqu %ymm6, %ymm2 ++ vmovdqu %ymm10, %ymm3 + + .p2align 4 +-L(find_nul): +-# ifdef USE_AS_WCSRCHR +- /* Keep the first bit for each matching CHAR for bsr. */ +- andl $0x11111111, %ecx +- andl $0x11111111, %eax +-# endif +- /* Mask out any matching bits after the nul CHAR. */ +- movl %ecx, %r8d +- subl $1, %r8d +- xorl %ecx, %r8d +- andl %r8d, %eax ++L(second_aligned_loop): ++ /* Search 2x at at time. */ ++ vmovdqa (VEC_SIZE * 0)(%rdi), %ymm4 ++ vmovdqa (VEC_SIZE * 1)(%rdi), %ymm5 ++ ++ VPCMPEQ %ymm4, %ymm7, %ymm6 ++ VPMIN %ymm4, %ymm5, %ymm1 ++ VPCMPEQ %ymm5, %ymm7, %ymm10 ++ vpor %ymm6, %ymm10, %ymm5 ++ VPCMPEQ %ymm1, %ymm0, %ymm1 ++ vpor %ymm5, %ymm1, %ymm9 ++ ++ vpmovmskb %ymm9, %eax ++ addq $(VEC_SIZE * 2), %rdi + testl %eax, %eax +- /* If there is no CHAR here, return the remembered one. */ +- jz L(return_value) +- bsrl %eax, %eax +- leaq -VEC_SIZE(%rdi, %rax), %rax +- VZEROUPPER_RETURN +- +- .p2align 4 +-L(char_and_nul): +- /* Find both a CHAR and a nul CHAR. */ +- addq %rcx, %rdi +- movl %edx, %ecx +-L(char_and_nul_in_first_vec): +-# ifdef USE_AS_WCSRCHR +- /* Keep the first bit for each matching CHAR for bsr. */ +- andl $0x11111111, %ecx +- andl $0x11111111, %eax +-# endif +- /* Mask out any matching bits after the nul CHAR. */ +- movl %ecx, %r8d +- subl $1, %r8d +- xorl %ecx, %r8d +- andl %r8d, %eax ++ jz L(second_aligned_loop) ++ vpmovmskb %ymm1, %ecx ++ testl %ecx, %ecx ++ jz L(second_aligned_loop_set_furthest_match) ++ vpmovmskb %ymm5, %eax + testl %eax, %eax +- /* Return null pointer if the nul CHAR comes first. */ +- jz L(return_null) +- bsrl %eax, %eax +- leaq -VEC_SIZE(%rdi, %rax), %rax ++ jnz L(return_new_match) ++ ++ /* This is the hot patch. We know CHAR is inbounds and that ++ ymm3/ymm2 have latest match. */ ++ .p2align 4,, 4 ++L(return_old_match): ++ vpmovmskb %ymm3, %eax ++ vpmovmskb %ymm2, %edx ++ salq $32, %rax ++ orq %rdx, %rax ++ bsrq %rax, %rax ++ /* Search char cannot be zero so safe to just use lea for ++ wcsrchr. */ ++ leaq (VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rsi, %rax), %rax + VZEROUPPER_RETURN + +- .p2align 4 +-L(return_null): +- xorl %eax, %eax ++ /* Last iteration also potentially has a match. */ ++ .p2align 4,, 8 ++L(return_new_match): ++ VPCMPEQ %ymm4, %ymm0, %ymm4 ++ vpmovmskb %ymm4, %edx ++ salq $32, %rcx ++ orq %rdx, %rcx ++ ++ vpmovmskb %ymm10, %eax ++ vpmovmskb %ymm6, %edx ++ salq $32, %rax ++ orq %rdx, %rax ++ blsmskq %rcx, %rcx ++ andq %rcx, %rax ++ jz L(return_old_match) ++ bsrq %rax, %rax ++ /* Search char cannot be zero so safe to just use lea for ++ wcsrchr. */ ++ leaq (VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rdi, %rax), %rax + VZEROUPPER_RETURN + +-END (STRRCHR) ++ .p2align 4,, 4 ++L(cross_page): ++ movq %rdi, %rsi ++ andq $-VEC_SIZE, %rsi ++ vmovdqu (%rsi), %ymm1 ++ VPCMPEQ %ymm1, %ymm0, %ymm6 ++ vpmovmskb %ymm6, %ecx ++ /* Shift out zero CHAR matches that are before the begining of ++ src (rdi). */ ++ shrxl %edi, %ecx, %ecx ++ testl %ecx, %ecx ++ jz L(page_cross_continue) ++ VPCMPEQ %ymm1, %ymm7, %ymm1 ++ vpmovmskb %ymm1, %eax ++ ++ /* Shift out search CHAR matches that are before the begining of ++ src (rdi). */ ++ shrxl %edi, %eax, %eax ++ blsmskl %ecx, %ecx ++ /* Check if any search CHAR match in range. */ ++ andl %ecx, %eax ++ jz L(ret2) ++ bsrl %eax, %eax ++ addq %rdi, %rax ++# ifdef USE_AS_WCSRCHR ++ andq $-CHAR_SIZE, %rax ++# endif ++L(ret2): ++ VZEROUPPER_RETURN ++END(STRRCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-235.patch b/SOURCES/glibc-upstream-2.34-235.patch new file mode 100644 index 0000000..c3ca959 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-235.patch @@ -0,0 +1,554 @@ +commit 596c9a32cc5d5eb82587e92d1e66c9ecb7668456 +Author: Noah Goldstein +Date: Thu Apr 21 20:52:30 2022 -0500 + + x86: Optimize {str|wcs}rchr-evex + + The new code unrolls the main loop slightly without adding too much + overhead and minimizes the comparisons for the search CHAR. + + Geometric Mean of all benchmarks New / Old: 0.755 + See email for all results. + + Full xcheck passes on x86_64 with and without multiarch enabled. + Reviewed-by: H.J. Lu + + (cherry picked from commit c966099cdc3e0fdf92f63eac09b22fa7e5f5f02d) + +diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S +index f920b5a584edd293..f5b6d755ceb85ae2 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-evex.S ++++ b/sysdeps/x86_64/multiarch/strrchr-evex.S +@@ -24,242 +24,351 @@ + # define STRRCHR __strrchr_evex + # endif + +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 + + # ifdef USE_AS_WCSRCHR ++# define SHIFT_REG esi ++ ++# define kunpck kunpckbw ++# define kmov_2x kmovd ++# define maskz_2x ecx ++# define maskm_2x eax ++# define CHAR_SIZE 4 ++# define VPMIN vpminud ++# define VPTESTN vptestnmd + # define VPBROADCAST vpbroadcastd +-# define VPCMP vpcmpd +-# define SHIFT_REG r8d ++# define VPCMP vpcmpd + # else ++# define SHIFT_REG edi ++ ++# define kunpck kunpckdq ++# define kmov_2x kmovq ++# define maskz_2x rcx ++# define maskm_2x rax ++ ++# define CHAR_SIZE 1 ++# define VPMIN vpminub ++# define VPTESTN vptestnmb + # define VPBROADCAST vpbroadcastb +-# define VPCMP vpcmpb +-# define SHIFT_REG ecx ++# define VPCMP vpcmpb + # endif + + # define XMMZERO xmm16 + # define YMMZERO ymm16 + # define YMMMATCH ymm17 +-# define YMM1 ymm18 ++# define YMMSAVE ymm18 ++ ++# define YMM1 ymm19 ++# define YMM2 ymm20 ++# define YMM3 ymm21 ++# define YMM4 ymm22 ++# define YMM5 ymm23 ++# define YMM6 ymm24 ++# define YMM7 ymm25 ++# define YMM8 ymm26 + +-# define VEC_SIZE 32 + +- .section .text.evex,"ax",@progbits +-ENTRY (STRRCHR) +- movl %edi, %ecx ++# define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++ .section .text.evex, "ax", @progbits ++ENTRY(STRRCHR) ++ movl %edi, %eax + /* Broadcast CHAR to YMMMATCH. */ + VPBROADCAST %esi, %YMMMATCH + +- vpxorq %XMMZERO, %XMMZERO, %XMMZERO +- +- /* Check if we may cross page boundary with one vector load. */ +- andl $(2 * VEC_SIZE - 1), %ecx +- cmpl $VEC_SIZE, %ecx +- ja L(cros_page_boundary) ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jg L(cross_page_boundary) + ++L(page_cross_continue): + VMOVU (%rdi), %YMM1 +- +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ /* k0 has a 1 for each zero CHAR in YMM1. */ ++ VPTESTN %YMM1, %YMM1, %k0 + kmovd %k0, %ecx +- kmovd %k1, %eax +- +- addq $VEC_SIZE, %rdi +- +- testl %eax, %eax +- jnz L(first_vec) +- + testl %ecx, %ecx +- jnz L(return_null) +- +- andq $-VEC_SIZE, %rdi +- xorl %edx, %edx +- jmp L(aligned_loop) +- +- .p2align 4 +-L(first_vec): +- /* Check if there is a null byte. */ +- testl %ecx, %ecx +- jnz L(char_and_nul_in_first_vec) +- +- /* Remember the match and keep searching. */ +- movl %eax, %edx +- movq %rdi, %rsi +- andq $-VEC_SIZE, %rdi +- jmp L(aligned_loop) +- +- .p2align 4 +-L(cros_page_boundary): +- andl $(VEC_SIZE - 1), %ecx +- andq $-VEC_SIZE, %rdi ++ jz L(aligned_more) ++ /* fallthrough: zero CHAR in first VEC. */ + ++ /* K1 has a 1 for each search CHAR match in YMM1. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k1, %eax ++ /* Build mask up until first zero CHAR (used to mask of ++ potential search CHAR matches past the end of the string). ++ */ ++ blsmskl %ecx, %ecx ++ andl %ecx, %eax ++ jz L(ret0) ++ /* Get last match (the `andl` removed any out of bounds ++ matches). */ ++ bsrl %eax, %eax + # ifdef USE_AS_WCSRCHR +- /* NB: Divide shift count by 4 since each bit in K1 represent 4 +- bytes. */ +- movl %ecx, %SHIFT_REG +- sarl $2, %SHIFT_REG ++ leaq (%rdi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rdi, %rax + # endif ++L(ret0): ++ ret + +- VMOVA (%rdi), %YMM1 +- +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ ++ /* Returns for first vec x1/x2/x3 have hard coded backward ++ search path for earlier matches. */ ++ .p2align 4,, 6 ++L(first_vec_x1): ++ VPCMP $0, %YMMMATCH, %YMM2, %k1 ++ kmovd %k1, %eax ++ blsmskl %ecx, %ecx ++ /* eax non-zero if search CHAR in range. */ ++ andl %ecx, %eax ++ jnz L(first_vec_x1_return) ++ ++ /* fallthrough: no match in YMM2 then need to check for earlier ++ matches (in YMM1). */ ++ .p2align 4,, 4 ++L(first_vec_x0_test): + VPCMP $0, %YMMMATCH, %YMM1, %k1 +- kmovd %k0, %edx + kmovd %k1, %eax +- +- shrxl %SHIFT_REG, %edx, %edx +- shrxl %SHIFT_REG, %eax, %eax +- addq $VEC_SIZE, %rdi +- +- /* Check if there is a CHAR. */ + testl %eax, %eax +- jnz L(found_char) +- +- testl %edx, %edx +- jnz L(return_null) +- +- jmp L(aligned_loop) +- +- .p2align 4 +-L(found_char): +- testl %edx, %edx +- jnz L(char_and_nul) +- +- /* Remember the match and keep searching. */ +- movl %eax, %edx +- leaq (%rdi, %rcx), %rsi ++ jz L(ret1) ++ bsrl %eax, %eax ++# ifdef USE_AS_WCSRCHR ++ leaq (%rsi, %rax, CHAR_SIZE), %rax ++# else ++ addq %rsi, %rax ++# endif ++L(ret1): ++ ret + +- .p2align 4 +-L(aligned_loop): +- VMOVA (%rdi), %YMM1 +- addq $VEC_SIZE, %rdi ++ .p2align 4,, 10 ++L(first_vec_x1_or_x2): ++ VPCMP $0, %YMM3, %YMMMATCH, %k3 ++ VPCMP $0, %YMM2, %YMMMATCH, %k2 ++ /* K2 and K3 have 1 for any search CHAR match. Test if any ++ matches between either of them. Otherwise check YMM1. */ ++ kortestd %k2, %k3 ++ jz L(first_vec_x0_test) ++ ++ /* Guranteed that YMM2 and YMM3 are within range so merge the ++ two bitmasks then get last result. */ ++ kunpck %k2, %k3, %k3 ++ kmovq %k3, %rax ++ bsrq %rax, %rax ++ leaq (VEC_SIZE)(%r8, %rax, CHAR_SIZE), %rax ++ ret + +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMMMATCH, %YMM1, %k1 +- kmovd %k0, %ecx ++ .p2align 4,, 6 ++L(first_vec_x3): ++ VPCMP $0, %YMMMATCH, %YMM4, %k1 + kmovd %k1, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) ++ blsmskl %ecx, %ecx ++ /* If no search CHAR match in range check YMM1/YMM2/YMM3. */ ++ andl %ecx, %eax ++ jz L(first_vec_x1_or_x2) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax ++ ret + +- VMOVA (%rdi), %YMM1 +- add $VEC_SIZE, %rdi ++ .p2align 4,, 6 ++L(first_vec_x0_x1_test): ++ VPCMP $0, %YMMMATCH, %YMM2, %k1 ++ kmovd %k1, %eax ++ /* Check YMM2 for last match first. If no match try YMM1. */ ++ testl %eax, %eax ++ jz L(first_vec_x0_test) ++ .p2align 4,, 4 ++L(first_vec_x1_return): ++ bsrl %eax, %eax ++ leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax ++ ret + +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMMMATCH, %YMM1, %k1 +- kmovd %k0, %ecx ++ .p2align 4,, 10 ++L(first_vec_x2): ++ VPCMP $0, %YMMMATCH, %YMM3, %k1 + kmovd %k1, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) ++ blsmskl %ecx, %ecx ++ /* Check YMM3 for last match first. If no match try YMM2/YMM1. ++ */ ++ andl %ecx, %eax ++ jz L(first_vec_x0_x1_test) ++ bsrl %eax, %eax ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret + +- VMOVA (%rdi), %YMM1 +- addq $VEC_SIZE, %rdi + +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ .p2align 4 ++L(aligned_more): ++ /* Need to keep original pointer incase YMM1 has last match. */ ++ movq %rdi, %rsi ++ andq $-VEC_SIZE, %rdi ++ VMOVU VEC_SIZE(%rdi), %YMM2 ++ VPTESTN %YMM2, %YMM2, %k0 + kmovd %k0, %ecx +- kmovd %k1, %eax +- orl %eax, %ecx +- jnz L(char_nor_null) ++ testl %ecx, %ecx ++ jnz L(first_vec_x1) + +- VMOVA (%rdi), %YMM1 +- addq $VEC_SIZE, %rdi ++ VMOVU (VEC_SIZE * 2)(%rdi), %YMM3 ++ VPTESTN %YMM3, %YMM3, %k0 ++ kmovd %k0, %ecx ++ testl %ecx, %ecx ++ jnz L(first_vec_x2) + +- /* Each bit in K0 represents a null byte in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k0 +- /* Each bit in K1 represents a CHAR in YMM1. */ +- VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ VMOVU (VEC_SIZE * 3)(%rdi), %YMM4 ++ VPTESTN %YMM4, %YMM4, %k0 + kmovd %k0, %ecx +- kmovd %k1, %eax +- orl %eax, %ecx +- jz L(aligned_loop) ++ movq %rdi, %r8 ++ testl %ecx, %ecx ++ jnz L(first_vec_x3) + ++ andq $-(VEC_SIZE * 2), %rdi + .p2align 4 +-L(char_nor_null): +- /* Find a CHAR or a null byte in a loop. */ ++L(first_aligned_loop): ++ /* Preserve YMM1, YMM2, YMM3, and YMM4 until we can gurantee ++ they don't store a match. */ ++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM5 ++ VMOVA (VEC_SIZE * 5)(%rdi), %YMM6 ++ ++ VPCMP $0, %YMM5, %YMMMATCH, %k2 ++ vpxord %YMM6, %YMMMATCH, %YMM7 ++ ++ VPMIN %YMM5, %YMM6, %YMM8 ++ VPMIN %YMM8, %YMM7, %YMM7 ++ ++ VPTESTN %YMM7, %YMM7, %k1 ++ subq $(VEC_SIZE * -2), %rdi ++ kortestd %k1, %k2 ++ jz L(first_aligned_loop) ++ ++ VPCMP $0, %YMM6, %YMMMATCH, %k3 ++ VPTESTN %YMM8, %YMM8, %k1 ++ ktestd %k1, %k1 ++ jz L(second_aligned_loop_prep) ++ ++ kortestd %k2, %k3 ++ jnz L(return_first_aligned_loop) ++ ++ .p2align 4,, 6 ++L(first_vec_x1_or_x2_or_x3): ++ VPCMP $0, %YMM4, %YMMMATCH, %k4 ++ kmovd %k4, %eax + testl %eax, %eax +- jnz L(match) +-L(return_value): +- testl %edx, %edx +- jz L(return_null) +- movl %edx, %eax +- movq %rsi, %rdi ++ jz L(first_vec_x1_or_x2) + bsrl %eax, %eax +-# ifdef USE_AS_WCSRCHR +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- leaq -VEC_SIZE(%rdi, %rax, 4), %rax +-# else +- leaq -VEC_SIZE(%rdi, %rax), %rax +-# endif ++ leaq (VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax + ret + +- .p2align 4 +-L(match): +- /* Find a CHAR. Check if there is a null byte. */ +- kmovd %k0, %ecx +- testl %ecx, %ecx +- jnz L(find_nul) ++ .p2align 4,, 8 ++L(return_first_aligned_loop): ++ VPTESTN %YMM5, %YMM5, %k0 ++ kunpck %k0, %k1, %k0 ++ kmov_2x %k0, %maskz_2x ++ ++ blsmsk %maskz_2x, %maskz_2x ++ kunpck %k2, %k3, %k3 ++ kmov_2x %k3, %maskm_2x ++ and %maskz_2x, %maskm_2x ++ jz L(first_vec_x1_or_x2_or_x3) + +- /* Remember the match and keep searching. */ +- movl %eax, %edx ++ bsr %maskm_2x, %maskm_2x ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++ .p2align 4 ++ /* We can throw away the work done for the first 4x checks here ++ as we have a later match. This is the 'fast' path persay. ++ */ ++L(second_aligned_loop_prep): ++L(second_aligned_loop_set_furthest_match): + movq %rdi, %rsi +- jmp L(aligned_loop) ++ kunpck %k2, %k3, %k4 + + .p2align 4 +-L(find_nul): +- /* Mask out any matching bits after the null byte. */ +- movl %ecx, %r8d +- subl $1, %r8d +- xorl %ecx, %r8d +- andl %r8d, %eax +- testl %eax, %eax +- /* If there is no CHAR here, return the remembered one. */ +- jz L(return_value) +- bsrl %eax, %eax ++L(second_aligned_loop): ++ VMOVU (VEC_SIZE * 4)(%rdi), %YMM1 ++ VMOVU (VEC_SIZE * 5)(%rdi), %YMM2 ++ ++ VPCMP $0, %YMM1, %YMMMATCH, %k2 ++ vpxord %YMM2, %YMMMATCH, %YMM3 ++ ++ VPMIN %YMM1, %YMM2, %YMM4 ++ VPMIN %YMM3, %YMM4, %YMM3 ++ ++ VPTESTN %YMM3, %YMM3, %k1 ++ subq $(VEC_SIZE * -2), %rdi ++ kortestd %k1, %k2 ++ jz L(second_aligned_loop) ++ ++ VPCMP $0, %YMM2, %YMMMATCH, %k3 ++ VPTESTN %YMM4, %YMM4, %k1 ++ ktestd %k1, %k1 ++ jz L(second_aligned_loop_set_furthest_match) ++ ++ kortestd %k2, %k3 ++ /* branch here because there is a significant advantage interms ++ of output dependency chance in using edx. */ ++ jnz L(return_new_match) ++L(return_old_match): ++ kmovq %k4, %rax ++ bsrq %rax, %rax ++ leaq (VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %rax ++ ret ++ ++L(return_new_match): ++ VPTESTN %YMM1, %YMM1, %k0 ++ kunpck %k0, %k1, %k0 ++ kmov_2x %k0, %maskz_2x ++ ++ blsmsk %maskz_2x, %maskz_2x ++ kunpck %k2, %k3, %k3 ++ kmov_2x %k3, %maskm_2x ++ and %maskz_2x, %maskm_2x ++ jz L(return_old_match) ++ ++ bsr %maskm_2x, %maskm_2x ++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ++ ret ++ ++L(cross_page_boundary): ++ /* eax contains all the page offset bits of src (rdi). `xor rdi, ++ rax` sets pointer will all page offset bits cleared so ++ offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC ++ before page cross (guranteed to be safe to read). Doing this ++ as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves ++ a bit of code size. */ ++ xorq %rdi, %rax ++ VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %YMM1 ++ VPTESTN %YMM1, %YMM1, %k0 ++ kmovd %k0, %ecx ++ ++ /* Shift out zero CHAR matches that are before the begining of ++ src (rdi). */ + # ifdef USE_AS_WCSRCHR +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- leaq -VEC_SIZE(%rdi, %rax, 4), %rax +-# else +- leaq -VEC_SIZE(%rdi, %rax), %rax ++ movl %edi, %esi ++ andl $(VEC_SIZE - 1), %esi ++ shrl $2, %esi + # endif +- ret ++ shrxl %SHIFT_REG, %ecx, %ecx + +- .p2align 4 +-L(char_and_nul): +- /* Find both a CHAR and a null byte. */ +- addq %rcx, %rdi +- movl %edx, %ecx +-L(char_and_nul_in_first_vec): +- /* Mask out any matching bits after the null byte. */ +- movl %ecx, %r8d +- subl $1, %r8d +- xorl %ecx, %r8d +- andl %r8d, %eax +- testl %eax, %eax +- /* Return null pointer if the null byte comes first. */ +- jz L(return_null) ++ testl %ecx, %ecx ++ jz L(page_cross_continue) ++ ++ /* Found zero CHAR so need to test for search CHAR. */ ++ VPCMP $0, %YMMMATCH, %YMM1, %k1 ++ kmovd %k1, %eax ++ /* Shift out search CHAR matches that are before the begining of ++ src (rdi). */ ++ shrxl %SHIFT_REG, %eax, %eax ++ ++ /* Check if any search CHAR match in range. */ ++ blsmskl %ecx, %ecx ++ andl %ecx, %eax ++ jz L(ret3) + bsrl %eax, %eax + # ifdef USE_AS_WCSRCHR +- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +- leaq -VEC_SIZE(%rdi, %rax, 4), %rax ++ leaq (%rdi, %rax, CHAR_SIZE), %rax + # else +- leaq -VEC_SIZE(%rdi, %rax), %rax ++ addq %rdi, %rax + # endif ++L(ret3): + ret + +- .p2align 4 +-L(return_null): +- xorl %eax, %eax +- ret +- +-END (STRRCHR) ++END(STRRCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-236.patch b/SOURCES/glibc-upstream-2.34-236.patch new file mode 100644 index 0000000..46f4449 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-236.patch @@ -0,0 +1,35 @@ +commit 1f83d40dfab15a6888759552f24d1b5c0907408b +Author: Florian Weimer +Date: Thu Dec 23 12:24:30 2021 +0100 + + elf: Remove unused NEED_DL_BASE_ADDR and _dl_base_addr + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit cd0c333d2ea82d0ae14719bdbef86d99615bdb00) + +diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c +index 4dc366eea445e974..1c78dc89c9cbe54d 100644 +--- a/elf/dl-sysdep.c ++++ b/elf/dl-sysdep.c +@@ -54,9 +54,6 @@ extern char _end[] attribute_hidden; + /* Protect SUID program against misuse of file descriptors. */ + extern void __libc_check_standard_fds (void); + +-#ifdef NEED_DL_BASE_ADDR +-ElfW(Addr) _dl_base_addr; +-#endif + int __libc_enable_secure attribute_relro = 0; + rtld_hidden_data_def (__libc_enable_secure) + /* This variable contains the lowest stack address ever used. */ +@@ -136,11 +133,6 @@ _dl_sysdep_start (void **start_argptr, + case AT_ENTRY: + user_entry = av->a_un.a_val; + break; +-#ifdef NEED_DL_BASE_ADDR +- case AT_BASE: +- _dl_base_addr = av->a_un.a_val; +- break; +-#endif + #ifndef HAVE_AUX_SECURE + case AT_UID: + case AT_EUID: diff --git a/SOURCES/glibc-upstream-2.34-237.patch b/SOURCES/glibc-upstream-2.34-237.patch new file mode 100644 index 0000000..1ea756f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-237.patch @@ -0,0 +1,751 @@ +commit b0bd6a1323c3eccd16c45bae359a76877fa75639 +Author: Florian Weimer +Date: Thu May 19 11:43:53 2022 +0200 + + elf: Merge dl-sysdep.c into the Linux version + + The generic version is the de-facto Linux implementation. It + requires an auxiliary vector, so Hurd does not use it. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 91c0a47ffb66e7cd802de870686465db3b3976a0) + +Conflicts: + elf/dl-sysdep.c + (missing ld.so dependency sorting optimization upstream) + +diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c +index 1c78dc89c9cbe54d..7aa90ad6eeb35cad 100644 +--- a/elf/dl-sysdep.c ++++ b/elf/dl-sysdep.c +@@ -1,5 +1,5 @@ +-/* Operating system support for run-time dynamic linker. Generic Unix version. +- Copyright (C) 1995-2021 Free Software Foundation, Inc. ++/* Operating system support for run-time dynamic linker. Stub version. ++ Copyright (C) 1995-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,352 +16,4 @@ + License along with the GNU C Library; if not, see + . */ + +-/* We conditionalize the whole of this file rather than simply eliding it +- from the static build, because other sysdeps/ versions of this file +- might define things needed by a static build. */ +- +-#ifdef SHARED +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include <_itoa.h> +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-extern char **_environ attribute_hidden; +-extern char _end[] attribute_hidden; +- +-/* Protect SUID program against misuse of file descriptors. */ +-extern void __libc_check_standard_fds (void); +- +-int __libc_enable_secure attribute_relro = 0; +-rtld_hidden_data_def (__libc_enable_secure) +-/* This variable contains the lowest stack address ever used. */ +-void *__libc_stack_end attribute_relro = NULL; +-rtld_hidden_data_def(__libc_stack_end) +-void *_dl_random attribute_relro = NULL; +- +-#ifndef DL_FIND_ARG_COMPONENTS +-# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \ +- do { \ +- void **_tmp; \ +- (argc) = *(long int *) cookie; \ +- (argv) = (char **) ((long int *) cookie + 1); \ +- (envp) = (argv) + (argc) + 1; \ +- for (_tmp = (void **) (envp); *_tmp; ++_tmp) \ +- continue; \ +- (auxp) = (void *) ++_tmp; \ +- } while (0) +-#endif +- +-#ifndef DL_STACK_END +-# define DL_STACK_END(cookie) ((void *) (cookie)) +-#endif +- +-ElfW(Addr) +-_dl_sysdep_start (void **start_argptr, +- void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, +- ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) +-{ +- const ElfW(Phdr) *phdr = NULL; +- ElfW(Word) phnum = 0; +- ElfW(Addr) user_entry; +- ElfW(auxv_t) *av; +-#ifdef HAVE_AUX_SECURE +-# define set_seen(tag) (tag) /* Evaluate for the side effects. */ +-# define set_seen_secure() ((void) 0) +-#else +- uid_t uid = 0; +- gid_t gid = 0; +- unsigned int seen = 0; +-# define set_seen_secure() (seen = -1) +-# ifdef HAVE_AUX_XID +-# define set_seen(tag) (tag) /* Evaluate for the side effects. */ +-# else +-# define M(type) (1 << (type)) +-# define set_seen(tag) seen |= M ((tag)->a_type) +-# endif +-#endif +-#ifdef NEED_DL_SYSINFO +- uintptr_t new_sysinfo = 0; +-#endif +- +- __libc_stack_end = DL_STACK_END (start_argptr); +- DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, _dl_argv, _environ, +- GLRO(dl_auxv)); +- +- user_entry = (ElfW(Addr)) ENTRY_POINT; +- GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ +- +- /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ +- _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), +- "CONSTANT_MINSIGSTKSZ is constant"); +- GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; +- +- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; set_seen (av++)) +- switch (av->a_type) +- { +- case AT_PHDR: +- phdr = (void *) av->a_un.a_val; +- break; +- case AT_PHNUM: +- phnum = av->a_un.a_val; +- break; +- case AT_PAGESZ: +- GLRO(dl_pagesize) = av->a_un.a_val; +- break; +- case AT_ENTRY: +- user_entry = av->a_un.a_val; +- break; +-#ifndef HAVE_AUX_SECURE +- case AT_UID: +- case AT_EUID: +- uid ^= av->a_un.a_val; +- break; +- case AT_GID: +- case AT_EGID: +- gid ^= av->a_un.a_val; +- break; +-#endif +- case AT_SECURE: +-#ifndef HAVE_AUX_SECURE +- seen = -1; +-#endif +- __libc_enable_secure = av->a_un.a_val; +- break; +- case AT_PLATFORM: +- GLRO(dl_platform) = (void *) av->a_un.a_val; +- break; +- case AT_HWCAP: +- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_HWCAP2: +- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_CLKTCK: +- GLRO(dl_clktck) = av->a_un.a_val; +- break; +- case AT_FPUCW: +- GLRO(dl_fpu_control) = av->a_un.a_val; +- break; +-#ifdef NEED_DL_SYSINFO +- case AT_SYSINFO: +- new_sysinfo = av->a_un.a_val; +- break; +-#endif +-#ifdef NEED_DL_SYSINFO_DSO +- case AT_SYSINFO_EHDR: +- GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; +- break; +-#endif +- case AT_RANDOM: +- _dl_random = (void *) av->a_un.a_val; +- break; +- case AT_MINSIGSTKSZ: +- GLRO(dl_minsigstacksize) = av->a_un.a_val; +- break; +- DL_PLATFORM_AUXV +- } +- +- dl_hwcap_check (); +- +-#ifndef HAVE_AUX_SECURE +- if (seen != -1) +- { +- /* Fill in the values we have not gotten from the kernel through the +- auxiliary vector. */ +-# ifndef HAVE_AUX_XID +-# define SEE(UID, var, uid) \ +- if ((seen & M (AT_##UID)) == 0) var ^= __get##uid () +- SEE (UID, uid, uid); +- SEE (EUID, uid, euid); +- SEE (GID, gid, gid); +- SEE (EGID, gid, egid); +-# endif +- +- /* If one of the two pairs of IDs does not match this is a setuid +- or setgid run. */ +- __libc_enable_secure = uid | gid; +- } +-#endif +- +-#ifndef HAVE_AUX_PAGESIZE +- if (GLRO(dl_pagesize) == 0) +- GLRO(dl_pagesize) = __getpagesize (); +-#endif +- +-#ifdef NEED_DL_SYSINFO +- if (new_sysinfo != 0) +- { +-# ifdef NEED_DL_SYSINFO_DSO +- /* Only set the sysinfo value if we also have the vsyscall DSO. */ +- if (GLRO(dl_sysinfo_dso) != 0) +-# endif +- GLRO(dl_sysinfo) = new_sysinfo; +- } +-#endif +- +- __tunables_init (_environ); +- +- /* Initialize DSO sorting algorithm after tunables. */ +- _dl_sort_maps_init (); +- +-#ifdef DL_SYSDEP_INIT +- DL_SYSDEP_INIT; +-#endif +- +-#ifdef DL_PLATFORM_INIT +- DL_PLATFORM_INIT; +-#endif +- +- /* Determine the length of the platform name. */ +- if (GLRO(dl_platform) != NULL) +- GLRO(dl_platformlen) = strlen (GLRO(dl_platform)); +- +- if (__sbrk (0) == _end) +- /* The dynamic linker was run as a program, and so the initial break +- starts just after our bss, at &_end. The malloc in dl-minimal.c +- will consume the rest of this page, so tell the kernel to move the +- break up that far. When the user program examines its break, it +- will see this new value and not clobber our data. */ +- __sbrk (GLRO(dl_pagesize) +- - ((_end - (char *) 0) & (GLRO(dl_pagesize) - 1))); +- +- /* If this is a SUID program we make sure that FDs 0, 1, and 2 are +- allocated. If necessary we are doing it ourself. If it is not +- possible we stop the program. */ +- if (__builtin_expect (__libc_enable_secure, 0)) +- __libc_check_standard_fds (); +- +- (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv)); +- return user_entry; +-} +- +-void +-_dl_sysdep_start_cleanup (void) +-{ +-} +- +-void +-_dl_show_auxv (void) +-{ +- char buf[64]; +- ElfW(auxv_t) *av; +- +- /* Terminate string. */ +- buf[63] = '\0'; +- +- /* The following code assumes that the AT_* values are encoded +- starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values +- close by (otherwise the array will be too large). In case we have +- to support a platform where these requirements are not fulfilled +- some alternative implementation has to be used. */ +- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av) +- { +- static const struct +- { +- const char label[22]; +- enum { unknown = 0, dec, hex, str, ignore } form : 8; +- } auxvars[] = +- { +- [AT_EXECFD - 2] = { "EXECFD: ", dec }, +- [AT_EXECFN - 2] = { "EXECFN: ", str }, +- [AT_PHDR - 2] = { "PHDR: 0x", hex }, +- [AT_PHENT - 2] = { "PHENT: ", dec }, +- [AT_PHNUM - 2] = { "PHNUM: ", dec }, +- [AT_PAGESZ - 2] = { "PAGESZ: ", dec }, +- [AT_BASE - 2] = { "BASE: 0x", hex }, +- [AT_FLAGS - 2] = { "FLAGS: 0x", hex }, +- [AT_ENTRY - 2] = { "ENTRY: 0x", hex }, +- [AT_NOTELF - 2] = { "NOTELF: ", hex }, +- [AT_UID - 2] = { "UID: ", dec }, +- [AT_EUID - 2] = { "EUID: ", dec }, +- [AT_GID - 2] = { "GID: ", dec }, +- [AT_EGID - 2] = { "EGID: ", dec }, +- [AT_PLATFORM - 2] = { "PLATFORM: ", str }, +- [AT_HWCAP - 2] = { "HWCAP: ", hex }, +- [AT_CLKTCK - 2] = { "CLKTCK: ", dec }, +- [AT_FPUCW - 2] = { "FPUCW: ", hex }, +- [AT_DCACHEBSIZE - 2] = { "DCACHEBSIZE: 0x", hex }, +- [AT_ICACHEBSIZE - 2] = { "ICACHEBSIZE: 0x", hex }, +- [AT_UCACHEBSIZE - 2] = { "UCACHEBSIZE: 0x", hex }, +- [AT_IGNOREPPC - 2] = { "IGNOREPPC", ignore }, +- [AT_SECURE - 2] = { "SECURE: ", dec }, +- [AT_BASE_PLATFORM - 2] = { "BASE_PLATFORM: ", str }, +- [AT_SYSINFO - 2] = { "SYSINFO: 0x", hex }, +- [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex }, +- [AT_RANDOM - 2] = { "RANDOM: 0x", hex }, +- [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex }, +- [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec }, +- [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec }, +- [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex }, +- [AT_L1D_CACHESIZE - 2] = { "L1D_CACHESIZE: ", dec }, +- [AT_L1D_CACHEGEOMETRY - 2] = { "L1D_CACHEGEOMETRY: 0x", hex }, +- [AT_L2_CACHESIZE - 2] = { "L2_CACHESIZE: ", dec }, +- [AT_L2_CACHEGEOMETRY - 2] = { "L2_CACHEGEOMETRY: 0x", hex }, +- [AT_L3_CACHESIZE - 2] = { "L3_CACHESIZE: ", dec }, +- [AT_L3_CACHEGEOMETRY - 2] = { "L3_CACHEGEOMETRY: 0x", hex }, +- }; +- unsigned int idx = (unsigned int) (av->a_type - 2); +- +- if ((unsigned int) av->a_type < 2u +- || (idx < sizeof (auxvars) / sizeof (auxvars[0]) +- && auxvars[idx].form == ignore)) +- continue; +- +- assert (AT_NULL == 0); +- assert (AT_IGNORE == 1); +- +- /* Some entries are handled in a special way per platform. */ +- if (_dl_procinfo (av->a_type, av->a_un.a_val) == 0) +- continue; +- +- if (idx < sizeof (auxvars) / sizeof (auxvars[0]) +- && auxvars[idx].form != unknown) +- { +- const char *val = (char *) av->a_un.a_val; +- +- if (__builtin_expect (auxvars[idx].form, dec) == dec) +- val = _itoa ((unsigned long int) av->a_un.a_val, +- buf + sizeof buf - 1, 10, 0); +- else if (__builtin_expect (auxvars[idx].form, hex) == hex) +- val = _itoa ((unsigned long int) av->a_un.a_val, +- buf + sizeof buf - 1, 16, 0); +- +- _dl_printf ("AT_%s%s\n", auxvars[idx].label, val); +- +- continue; +- } +- +- /* Unknown value: print a generic line. */ +- char buf2[17]; +- buf2[sizeof (buf2) - 1] = '\0'; +- const char *val2 = _itoa ((unsigned long int) av->a_un.a_val, +- buf2 + sizeof buf2 - 1, 16, 0); +- const char *val = _itoa ((unsigned long int) av->a_type, +- buf + sizeof buf - 1, 16, 0); +- _dl_printf ("AT_??? (0x%s): 0x%s\n", val, val2); +- } +-} +- +-#endif ++#error dl-sysdep support missing. +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index 144dc5ce5a1bba17..3e41469bcc395179 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -16,29 +16,352 @@ + License along with the GNU C Library; if not, see + . */ + +-/* Linux needs some special initialization, but otherwise uses +- the generic dynamic linker system interface code. */ +- +-#include ++#include <_itoa.h> ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + #include +-#include +-#include +-#include ++#include + #include ++#include ++#include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include + + #ifdef SHARED +-# define DL_SYSDEP_INIT frob_brk () ++extern char **_environ attribute_hidden; ++extern char _end[] attribute_hidden; ++ ++/* Protect SUID program against misuse of file descriptors. */ ++extern void __libc_check_standard_fds (void); + +-static inline void +-frob_brk (void) ++int __libc_enable_secure attribute_relro = 0; ++rtld_hidden_data_def (__libc_enable_secure) ++/* This variable contains the lowest stack address ever used. */ ++void *__libc_stack_end attribute_relro = NULL; ++rtld_hidden_data_def(__libc_stack_end) ++void *_dl_random attribute_relro = NULL; ++ ++#ifndef DL_FIND_ARG_COMPONENTS ++# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \ ++ do { \ ++ void **_tmp; \ ++ (argc) = *(long int *) cookie; \ ++ (argv) = (char **) ((long int *) cookie + 1); \ ++ (envp) = (argv) + (argc) + 1; \ ++ for (_tmp = (void **) (envp); *_tmp; ++_tmp) \ ++ continue; \ ++ (auxp) = (void *) ++_tmp; \ ++ } while (0) ++#endif ++ ++#ifndef DL_STACK_END ++# define DL_STACK_END(cookie) ((void *) (cookie)) ++#endif ++ ++ElfW(Addr) ++_dl_sysdep_start (void **start_argptr, ++ void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, ++ ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) + { ++ const ElfW(Phdr) *phdr = NULL; ++ ElfW(Word) phnum = 0; ++ ElfW(Addr) user_entry; ++ ElfW(auxv_t) *av; ++#ifdef HAVE_AUX_SECURE ++# define set_seen(tag) (tag) /* Evaluate for the side effects. */ ++# define set_seen_secure() ((void) 0) ++#else ++ uid_t uid = 0; ++ gid_t gid = 0; ++ unsigned int seen = 0; ++# define set_seen_secure() (seen = -1) ++# ifdef HAVE_AUX_XID ++# define set_seen(tag) (tag) /* Evaluate for the side effects. */ ++# else ++# define M(type) (1 << (type)) ++# define set_seen(tag) seen |= M ((tag)->a_type) ++# endif ++#endif ++#ifdef NEED_DL_SYSINFO ++ uintptr_t new_sysinfo = 0; ++#endif ++ ++ __libc_stack_end = DL_STACK_END (start_argptr); ++ DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, _dl_argv, _environ, ++ GLRO(dl_auxv)); ++ ++ user_entry = (ElfW(Addr)) ENTRY_POINT; ++ GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ ++ ++ /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ ++ _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), ++ "CONSTANT_MINSIGSTKSZ is constant"); ++ GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; ++ ++ for (av = GLRO(dl_auxv); av->a_type != AT_NULL; set_seen (av++)) ++ switch (av->a_type) ++ { ++ case AT_PHDR: ++ phdr = (void *) av->a_un.a_val; ++ break; ++ case AT_PHNUM: ++ phnum = av->a_un.a_val; ++ break; ++ case AT_PAGESZ: ++ GLRO(dl_pagesize) = av->a_un.a_val; ++ break; ++ case AT_ENTRY: ++ user_entry = av->a_un.a_val; ++ break; ++#ifndef HAVE_AUX_SECURE ++ case AT_UID: ++ case AT_EUID: ++ uid ^= av->a_un.a_val; ++ break; ++ case AT_GID: ++ case AT_EGID: ++ gid ^= av->a_un.a_val; ++ break; ++#endif ++ case AT_SECURE: ++#ifndef HAVE_AUX_SECURE ++ seen = -1; ++#endif ++ __libc_enable_secure = av->a_un.a_val; ++ break; ++ case AT_PLATFORM: ++ GLRO(dl_platform) = (void *) av->a_un.a_val; ++ break; ++ case AT_HWCAP: ++ GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_HWCAP2: ++ GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_CLKTCK: ++ GLRO(dl_clktck) = av->a_un.a_val; ++ break; ++ case AT_FPUCW: ++ GLRO(dl_fpu_control) = av->a_un.a_val; ++ break; ++#ifdef NEED_DL_SYSINFO ++ case AT_SYSINFO: ++ new_sysinfo = av->a_un.a_val; ++ break; ++#endif ++#ifdef NEED_DL_SYSINFO_DSO ++ case AT_SYSINFO_EHDR: ++ GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; ++ break; ++#endif ++ case AT_RANDOM: ++ _dl_random = (void *) av->a_un.a_val; ++ break; ++ case AT_MINSIGSTKSZ: ++ GLRO(dl_minsigstacksize) = av->a_un.a_val; ++ break; ++ DL_PLATFORM_AUXV ++ } ++ ++ dl_hwcap_check (); ++ ++#ifndef HAVE_AUX_SECURE ++ if (seen != -1) ++ { ++ /* Fill in the values we have not gotten from the kernel through the ++ auxiliary vector. */ ++# ifndef HAVE_AUX_XID ++# define SEE(UID, var, uid) \ ++ if ((seen & M (AT_##UID)) == 0) var ^= __get##uid () ++ SEE (UID, uid, uid); ++ SEE (EUID, uid, euid); ++ SEE (GID, gid, gid); ++ SEE (EGID, gid, egid); ++# endif ++ ++ /* If one of the two pairs of IDs does not match this is a setuid ++ or setgid run. */ ++ __libc_enable_secure = uid | gid; ++ } ++#endif ++ ++#ifndef HAVE_AUX_PAGESIZE ++ if (GLRO(dl_pagesize) == 0) ++ GLRO(dl_pagesize) = __getpagesize (); ++#endif ++ ++#ifdef NEED_DL_SYSINFO ++ if (new_sysinfo != 0) ++ { ++# ifdef NEED_DL_SYSINFO_DSO ++ /* Only set the sysinfo value if we also have the vsyscall DSO. */ ++ if (GLRO(dl_sysinfo_dso) != 0) ++# endif ++ GLRO(dl_sysinfo) = new_sysinfo; ++ } ++#endif ++ ++ __tunables_init (_environ); ++ ++ /* Initialize DSO sorting algorithm after tunables. */ ++ _dl_sort_maps_init (); ++ + __brk (0); /* Initialize the break. */ +-} + +-# include ++#ifdef DL_PLATFORM_INIT ++ DL_PLATFORM_INIT; + #endif + ++ /* Determine the length of the platform name. */ ++ if (GLRO(dl_platform) != NULL) ++ GLRO(dl_platformlen) = strlen (GLRO(dl_platform)); ++ ++ if (__sbrk (0) == _end) ++ /* The dynamic linker was run as a program, and so the initial break ++ starts just after our bss, at &_end. The malloc in dl-minimal.c ++ will consume the rest of this page, so tell the kernel to move the ++ break up that far. When the user program examines its break, it ++ will see this new value and not clobber our data. */ ++ __sbrk (GLRO(dl_pagesize) ++ - ((_end - (char *) 0) & (GLRO(dl_pagesize) - 1))); ++ ++ /* If this is a SUID program we make sure that FDs 0, 1, and 2 are ++ allocated. If necessary we are doing it ourself. If it is not ++ possible we stop the program. */ ++ if (__builtin_expect (__libc_enable_secure, 0)) ++ __libc_check_standard_fds (); ++ ++ (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv)); ++ return user_entry; ++} ++ ++void ++_dl_sysdep_start_cleanup (void) ++{ ++} ++ ++void ++_dl_show_auxv (void) ++{ ++ char buf[64]; ++ ElfW(auxv_t) *av; ++ ++ /* Terminate string. */ ++ buf[63] = '\0'; ++ ++ /* The following code assumes that the AT_* values are encoded ++ starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values ++ close by (otherwise the array will be too large). In case we have ++ to support a platform where these requirements are not fulfilled ++ some alternative implementation has to be used. */ ++ for (av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av) ++ { ++ static const struct ++ { ++ const char label[22]; ++ enum { unknown = 0, dec, hex, str, ignore } form : 8; ++ } auxvars[] = ++ { ++ [AT_EXECFD - 2] = { "EXECFD: ", dec }, ++ [AT_EXECFN - 2] = { "EXECFN: ", str }, ++ [AT_PHDR - 2] = { "PHDR: 0x", hex }, ++ [AT_PHENT - 2] = { "PHENT: ", dec }, ++ [AT_PHNUM - 2] = { "PHNUM: ", dec }, ++ [AT_PAGESZ - 2] = { "PAGESZ: ", dec }, ++ [AT_BASE - 2] = { "BASE: 0x", hex }, ++ [AT_FLAGS - 2] = { "FLAGS: 0x", hex }, ++ [AT_ENTRY - 2] = { "ENTRY: 0x", hex }, ++ [AT_NOTELF - 2] = { "NOTELF: ", hex }, ++ [AT_UID - 2] = { "UID: ", dec }, ++ [AT_EUID - 2] = { "EUID: ", dec }, ++ [AT_GID - 2] = { "GID: ", dec }, ++ [AT_EGID - 2] = { "EGID: ", dec }, ++ [AT_PLATFORM - 2] = { "PLATFORM: ", str }, ++ [AT_HWCAP - 2] = { "HWCAP: ", hex }, ++ [AT_CLKTCK - 2] = { "CLKTCK: ", dec }, ++ [AT_FPUCW - 2] = { "FPUCW: ", hex }, ++ [AT_DCACHEBSIZE - 2] = { "DCACHEBSIZE: 0x", hex }, ++ [AT_ICACHEBSIZE - 2] = { "ICACHEBSIZE: 0x", hex }, ++ [AT_UCACHEBSIZE - 2] = { "UCACHEBSIZE: 0x", hex }, ++ [AT_IGNOREPPC - 2] = { "IGNOREPPC", ignore }, ++ [AT_SECURE - 2] = { "SECURE: ", dec }, ++ [AT_BASE_PLATFORM - 2] = { "BASE_PLATFORM: ", str }, ++ [AT_SYSINFO - 2] = { "SYSINFO: 0x", hex }, ++ [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex }, ++ [AT_RANDOM - 2] = { "RANDOM: 0x", hex }, ++ [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex }, ++ [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec }, ++ [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec }, ++ [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex }, ++ [AT_L1D_CACHESIZE - 2] = { "L1D_CACHESIZE: ", dec }, ++ [AT_L1D_CACHEGEOMETRY - 2] = { "L1D_CACHEGEOMETRY: 0x", hex }, ++ [AT_L2_CACHESIZE - 2] = { "L2_CACHESIZE: ", dec }, ++ [AT_L2_CACHEGEOMETRY - 2] = { "L2_CACHEGEOMETRY: 0x", hex }, ++ [AT_L3_CACHESIZE - 2] = { "L3_CACHESIZE: ", dec }, ++ [AT_L3_CACHEGEOMETRY - 2] = { "L3_CACHEGEOMETRY: 0x", hex }, ++ }; ++ unsigned int idx = (unsigned int) (av->a_type - 2); ++ ++ if ((unsigned int) av->a_type < 2u ++ || (idx < sizeof (auxvars) / sizeof (auxvars[0]) ++ && auxvars[idx].form == ignore)) ++ continue; ++ ++ assert (AT_NULL == 0); ++ assert (AT_IGNORE == 1); ++ ++ /* Some entries are handled in a special way per platform. */ ++ if (_dl_procinfo (av->a_type, av->a_un.a_val) == 0) ++ continue; ++ ++ if (idx < sizeof (auxvars) / sizeof (auxvars[0]) ++ && auxvars[idx].form != unknown) ++ { ++ const char *val = (char *) av->a_un.a_val; ++ ++ if (__builtin_expect (auxvars[idx].form, dec) == dec) ++ val = _itoa ((unsigned long int) av->a_un.a_val, ++ buf + sizeof buf - 1, 10, 0); ++ else if (__builtin_expect (auxvars[idx].form, hex) == hex) ++ val = _itoa ((unsigned long int) av->a_un.a_val, ++ buf + sizeof buf - 1, 16, 0); ++ ++ _dl_printf ("AT_%s%s\n", auxvars[idx].label, val); ++ ++ continue; ++ } ++ ++ /* Unknown value: print a generic line. */ ++ char buf2[17]; ++ buf2[sizeof (buf2) - 1] = '\0'; ++ const char *val2 = _itoa ((unsigned long int) av->a_un.a_val, ++ buf2 + sizeof buf2 - 1, 16, 0); ++ const char *val = _itoa ((unsigned long int) av->a_type, ++ buf + sizeof buf - 1, 16, 0); ++ _dl_printf ("AT_??? (0x%s): 0x%s\n", val, val2); ++ } ++} ++ ++#endif /* SHARED */ ++ + + int + attribute_hidden diff --git a/SOURCES/glibc-upstream-2.34-238.patch b/SOURCES/glibc-upstream-2.34-238.patch new file mode 100644 index 0000000..4d07a8a --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-238.patch @@ -0,0 +1,120 @@ +commit 2139b1848e3e0a960ccc615fe1fd78b5d10b1411 +Author: Florian Weimer +Date: Thu Feb 3 10:58:59 2022 +0100 + + Linux: Remove HAVE_AUX_SECURE, HAVE_AUX_XID, HAVE_AUX_PAGESIZE + + They are always defined. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit b9c3d3382f6f50e9723002deb2dc8127de720fa6) + +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index 3e41469bcc395179..aae983777ba15fae 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -85,21 +85,6 @@ _dl_sysdep_start (void **start_argptr, + ElfW(Word) phnum = 0; + ElfW(Addr) user_entry; + ElfW(auxv_t) *av; +-#ifdef HAVE_AUX_SECURE +-# define set_seen(tag) (tag) /* Evaluate for the side effects. */ +-# define set_seen_secure() ((void) 0) +-#else +- uid_t uid = 0; +- gid_t gid = 0; +- unsigned int seen = 0; +-# define set_seen_secure() (seen = -1) +-# ifdef HAVE_AUX_XID +-# define set_seen(tag) (tag) /* Evaluate for the side effects. */ +-# else +-# define M(type) (1 << (type)) +-# define set_seen(tag) seen |= M ((tag)->a_type) +-# endif +-#endif + #ifdef NEED_DL_SYSINFO + uintptr_t new_sysinfo = 0; + #endif +@@ -116,7 +101,7 @@ _dl_sysdep_start (void **start_argptr, + "CONSTANT_MINSIGSTKSZ is constant"); + GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; + +- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; set_seen (av++)) ++ for (av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) + switch (av->a_type) + { + case AT_PHDR: +@@ -131,20 +116,7 @@ _dl_sysdep_start (void **start_argptr, + case AT_ENTRY: + user_entry = av->a_un.a_val; + break; +-#ifndef HAVE_AUX_SECURE +- case AT_UID: +- case AT_EUID: +- uid ^= av->a_un.a_val; +- break; +- case AT_GID: +- case AT_EGID: +- gid ^= av->a_un.a_val; +- break; +-#endif + case AT_SECURE: +-#ifndef HAVE_AUX_SECURE +- seen = -1; +-#endif + __libc_enable_secure = av->a_un.a_val; + break; + case AT_PLATFORM: +@@ -183,31 +155,6 @@ _dl_sysdep_start (void **start_argptr, + + dl_hwcap_check (); + +-#ifndef HAVE_AUX_SECURE +- if (seen != -1) +- { +- /* Fill in the values we have not gotten from the kernel through the +- auxiliary vector. */ +-# ifndef HAVE_AUX_XID +-# define SEE(UID, var, uid) \ +- if ((seen & M (AT_##UID)) == 0) var ^= __get##uid () +- SEE (UID, uid, uid); +- SEE (EUID, uid, euid); +- SEE (GID, gid, gid); +- SEE (EGID, gid, egid); +-# endif +- +- /* If one of the two pairs of IDs does not match this is a setuid +- or setgid run. */ +- __libc_enable_secure = uid | gid; +- } +-#endif +- +-#ifndef HAVE_AUX_PAGESIZE +- if (GLRO(dl_pagesize) == 0) +- GLRO(dl_pagesize) = __getpagesize (); +-#endif +- + #ifdef NEED_DL_SYSINFO + if (new_sysinfo != 0) + { +diff --git a/sysdeps/unix/sysv/linux/ldsodefs.h b/sysdeps/unix/sysv/linux/ldsodefs.h +index 7e01f685b03b984d..0f152c592c2a9b04 100644 +--- a/sysdeps/unix/sysv/linux/ldsodefs.h ++++ b/sysdeps/unix/sysv/linux/ldsodefs.h +@@ -24,16 +24,4 @@ + /* Get the real definitions. */ + #include_next + +-/* We can assume that the kernel always provides the AT_UID, AT_EUID, +- AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on. */ +-#define HAVE_AUX_XID +- +-/* We can assume that the kernel always provides the AT_SECURE value +- in the auxiliary vector from 2.5.74 or so on. */ +-#define HAVE_AUX_SECURE +- +-/* Starting with one of the 2.4.0 pre-releases the Linux kernel passes +- up the page size information. */ +-#define HAVE_AUX_PAGESIZE +- + #endif /* ldsodefs.h */ diff --git a/SOURCES/glibc-upstream-2.34-239.patch b/SOURCES/glibc-upstream-2.34-239.patch new file mode 100644 index 0000000..ef06d23 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-239.patch @@ -0,0 +1,55 @@ +commit 458733fffe2c410418b5f633ffd6ed65efd2aac0 +Author: Florian Weimer +Date: Thu Feb 3 10:58:59 2022 +0100 + + Linux: Remove DL_FIND_ARG_COMPONENTS + + The generic definition is always used since the Native Client + port has been removed. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 2d47fa68628e831a692cba8fc9050cef435afc5e) + +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index aae983777ba15fae..e36b3e6b63b1aa7e 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -59,19 +59,6 @@ void *__libc_stack_end attribute_relro = NULL; + rtld_hidden_data_def(__libc_stack_end) + void *_dl_random attribute_relro = NULL; + +-#ifndef DL_FIND_ARG_COMPONENTS +-# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \ +- do { \ +- void **_tmp; \ +- (argc) = *(long int *) cookie; \ +- (argv) = (char **) ((long int *) cookie + 1); \ +- (envp) = (argv) + (argc) + 1; \ +- for (_tmp = (void **) (envp); *_tmp; ++_tmp) \ +- continue; \ +- (auxp) = (void *) ++_tmp; \ +- } while (0) +-#endif +- + #ifndef DL_STACK_END + # define DL_STACK_END(cookie) ((void *) (cookie)) + #endif +@@ -90,8 +77,16 @@ _dl_sysdep_start (void **start_argptr, + #endif + + __libc_stack_end = DL_STACK_END (start_argptr); +- DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, _dl_argv, _environ, +- GLRO(dl_auxv)); ++ _dl_argc = (intptr_t) *start_argptr; ++ _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ ++ _environ = _dl_argv + _dl_argc + 1; ++ for (char **tmp = _environ + 1; ; ++tmp) ++ if (*tmp == NULL) ++ { ++ /* Another necessary aliasing violation. */ ++ GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1); ++ break; ++ } + + user_entry = (ElfW(Addr)) ENTRY_POINT; + GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ diff --git a/SOURCES/glibc-upstream-2.34-240.patch b/SOURCES/glibc-upstream-2.34-240.patch new file mode 100644 index 0000000..d5cec58 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-240.patch @@ -0,0 +1,70 @@ +commit 08728256faf69b159b9ecd64f7f8b734f5f456e4 +Author: Florian Weimer +Date: Thu Feb 3 10:58:59 2022 +0100 + + Linux: Assume that NEED_DL_SYSINFO_DSO is always defined + + The definition itself is still needed for generic code. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit f19fc997a5754a6c0bb9e43618f0597e878061f7) + +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index e36b3e6b63b1aa7e..1829dab4f38b560c 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -134,11 +134,9 @@ _dl_sysdep_start (void **start_argptr, + new_sysinfo = av->a_un.a_val; + break; + #endif +-#ifdef NEED_DL_SYSINFO_DSO + case AT_SYSINFO_EHDR: + GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; + break; +-#endif + case AT_RANDOM: + _dl_random = (void *) av->a_un.a_val; + break; +@@ -153,10 +151,8 @@ _dl_sysdep_start (void **start_argptr, + #ifdef NEED_DL_SYSINFO + if (new_sysinfo != 0) + { +-# ifdef NEED_DL_SYSINFO_DSO + /* Only set the sysinfo value if we also have the vsyscall DSO. */ + if (GLRO(dl_sysinfo_dso) != 0) +-# endif + GLRO(dl_sysinfo) = new_sysinfo; + } + #endif +@@ -309,7 +305,7 @@ int + attribute_hidden + _dl_discover_osversion (void) + { +-#if defined NEED_DL_SYSINFO_DSO && defined SHARED ++#ifdef SHARED + if (GLRO(dl_sysinfo_map) != NULL) + { + /* If the kernel-supplied DSO contains a note indicating the kernel's +@@ -340,7 +336,7 @@ _dl_discover_osversion (void) + } + } + } +-#endif ++#endif /* SHARED */ + + char bufmem[64]; + char *buf = bufmem; +diff --git a/sysdeps/unix/sysv/linux/m68k/sysdep.h b/sysdeps/unix/sysv/linux/m68k/sysdep.h +index b29986339a7e6cc0..11b93f2fa0af0e71 100644 +--- a/sysdeps/unix/sysv/linux/m68k/sysdep.h ++++ b/sysdeps/unix/sysv/linux/m68k/sysdep.h +@@ -301,8 +301,6 @@ SYSCALL_ERROR_LABEL: \ + #define PTR_MANGLE(var) (void) (var) + #define PTR_DEMANGLE(var) (void) (var) + +-#if defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO + /* M68K needs system-supplied DSO to access TLS helpers + even when statically linked. */ +-# define NEED_STATIC_SYSINFO_DSO 1 +-#endif ++#define NEED_STATIC_SYSINFO_DSO 1 diff --git a/SOURCES/glibc-upstream-2.34-241.patch b/SOURCES/glibc-upstream-2.34-241.patch new file mode 100644 index 0000000..0d67f1c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-241.patch @@ -0,0 +1,410 @@ +commit 4b9cd5465d5158dad7b4f0762bc70a3a1209b481 +Author: Florian Weimer +Date: Thu Feb 3 10:58:59 2022 +0100 + + Linux: Consolidate auxiliary vector parsing + + And optimize it slightly. + + The large switch statement in _dl_sysdep_start can be replaced with + a large array. This reduces source code and binary size. On + i686-linux-gnu: + + Before: + + text data bss dec hex filename + 7791 12 0 7803 1e7b elf/dl-sysdep.os + + After: + + text data bss dec hex filename + 7135 12 0 7147 1beb elf/dl-sysdep.os + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 8c8510ab2790039e58995ef3a22309582413d3ff) + +diff --git a/elf/dl-support.c b/elf/dl-support.c +index f29dc965f4d10648..40ef07521336857d 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -241,93 +241,21 @@ __rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) + + + #ifdef HAVE_AUX_VECTOR ++#include ++ + int _dl_clktck; + + void + _dl_aux_init (ElfW(auxv_t) *av) + { +- int seen = 0; +- uid_t uid = 0; +- gid_t gid = 0; +- + #ifdef NEED_DL_SYSINFO + /* NB: Avoid RELATIVE relocation in static PIE. */ + GL(dl_sysinfo) = DL_SYSINFO_DEFAULT; + #endif + + _dl_auxv = av; +- for (; av->a_type != AT_NULL; ++av) +- switch (av->a_type) +- { +- case AT_PAGESZ: +- if (av->a_un.a_val != 0) +- GLRO(dl_pagesize) = av->a_un.a_val; +- break; +- case AT_CLKTCK: +- GLRO(dl_clktck) = av->a_un.a_val; +- break; +- case AT_PHDR: +- GL(dl_phdr) = (const void *) av->a_un.a_val; +- break; +- case AT_PHNUM: +- GL(dl_phnum) = av->a_un.a_val; +- break; +- case AT_PLATFORM: +- GLRO(dl_platform) = (void *) av->a_un.a_val; +- break; +- case AT_HWCAP: +- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_HWCAP2: +- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_FPUCW: +- GLRO(dl_fpu_control) = av->a_un.a_val; +- break; +-#ifdef NEED_DL_SYSINFO +- case AT_SYSINFO: +- GL(dl_sysinfo) = av->a_un.a_val; +- break; +-#endif +-#ifdef NEED_DL_SYSINFO_DSO +- case AT_SYSINFO_EHDR: +- GL(dl_sysinfo_dso) = (void *) av->a_un.a_val; +- break; +-#endif +- case AT_UID: +- uid ^= av->a_un.a_val; +- seen |= 1; +- break; +- case AT_EUID: +- uid ^= av->a_un.a_val; +- seen |= 2; +- break; +- case AT_GID: +- gid ^= av->a_un.a_val; +- seen |= 4; +- break; +- case AT_EGID: +- gid ^= av->a_un.a_val; +- seen |= 8; +- break; +- case AT_SECURE: +- seen = -1; +- __libc_enable_secure = av->a_un.a_val; +- __libc_enable_secure_decided = 1; +- break; +- case AT_RANDOM: +- _dl_random = (void *) av->a_un.a_val; +- break; +- case AT_MINSIGSTKSZ: +- _dl_minsigstacksize = av->a_un.a_val; +- break; +- DL_PLATFORM_AUXV +- } +- if (seen == 0xf) +- { +- __libc_enable_secure = uid != 0 || gid != 0; +- __libc_enable_secure_decided = 1; +- } ++ dl_parse_auxv_t auxv_values = { 0, }; ++ _dl_parse_auxv (av, auxv_values); + } + #endif + +diff --git a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +index 1aa9dca80d189ebe..8c99e776a0af9cef 100644 +--- a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +@@ -20,16 +20,8 @@ + + extern long __libc_alpha_cache_shape[4]; + +-#define DL_PLATFORM_AUXV \ +- case AT_L1I_CACHESHAPE: \ +- __libc_alpha_cache_shape[0] = av->a_un.a_val; \ +- break; \ +- case AT_L1D_CACHESHAPE: \ +- __libc_alpha_cache_shape[1] = av->a_un.a_val; \ +- break; \ +- case AT_L2_CACHESHAPE: \ +- __libc_alpha_cache_shape[2] = av->a_un.a_val; \ +- break; \ +- case AT_L3_CACHESHAPE: \ +- __libc_alpha_cache_shape[3] = av->a_un.a_val; \ +- break; ++#define DL_PLATFORM_AUXV \ ++ __libc_alpha_cache_shape[0] = auxv_values[AT_L1I_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[1] = auxv_values[AT_L1D_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[2] = auxv_values[AT_L2_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[3] = auxv_values[AT_L3_CACHESHAPE]; +diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +new file mode 100644 +index 0000000000000000..b3d82f69946d6d2c +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +@@ -0,0 +1,61 @@ ++/* Parse the Linux auxiliary vector. ++ Copyright (C) 1995-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1]; ++ ++/* Copy the auxiliary vector into AUX_VALUES and set up GLRO ++ variables. */ ++static inline ++void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) ++{ ++ auxv_values[AT_ENTRY] = (ElfW(Addr)) ENTRY_POINT; ++ auxv_values[AT_PAGESZ] = EXEC_PAGESIZE; ++ auxv_values[AT_FPUCW] = _FPU_DEFAULT; ++ ++ /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ ++ _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), ++ "CONSTANT_MINSIGSTKSZ is constant"); ++ auxv_values[AT_MINSIGSTKSZ] = CONSTANT_MINSIGSTKSZ; ++ ++ for (; av->a_type != AT_NULL; av++) ++ if (av->a_type <= AT_MINSIGSTKSZ) ++ auxv_values[av->a_type] = av->a_un.a_val; ++ ++ GLRO(dl_pagesize) = auxv_values[AT_PAGESZ]; ++ __libc_enable_secure = auxv_values[AT_SECURE]; ++ GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; ++ GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; ++ GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; ++ GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; ++ GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; ++ _dl_random = (void *) auxv_values[AT_RANDOM]; ++ GLRO(dl_minsigstacksize) = auxv_values[AT_MINSIGSTKSZ]; ++ GLRO(dl_sysinfo_dso) = (void *) auxv_values[AT_SYSINFO_EHDR]; ++#ifdef NEED_DL_SYSINFO ++ if (GLRO(dl_sysinfo_dso) != NULL) ++ GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO]; ++#endif ++ ++ DL_PLATFORM_AUXV ++} +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index 1829dab4f38b560c..80aa9f6f4acb7e3c 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -21,13 +21,12 @@ + #include + #include + #include ++#include + #include + #include + #include +-#include + #include + #include +-#include + #include + #include + #include +@@ -63,24 +62,24 @@ void *_dl_random attribute_relro = NULL; + # define DL_STACK_END(cookie) ((void *) (cookie)) + #endif + +-ElfW(Addr) +-_dl_sysdep_start (void **start_argptr, +- void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, +- ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) ++/* Arguments passed to dl_main. */ ++struct dl_main_arguments + { +- const ElfW(Phdr) *phdr = NULL; +- ElfW(Word) phnum = 0; ++ const ElfW(Phdr) *phdr; ++ ElfW(Word) phnum; + ElfW(Addr) user_entry; +- ElfW(auxv_t) *av; +-#ifdef NEED_DL_SYSINFO +- uintptr_t new_sysinfo = 0; +-#endif ++}; + +- __libc_stack_end = DL_STACK_END (start_argptr); ++/* Separate function, so that dl_main can be called without the large ++ array on the stack. */ ++static void ++_dl_sysdep_parse_arguments (void **start_argptr, ++ struct dl_main_arguments *args) ++{ + _dl_argc = (intptr_t) *start_argptr; + _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ + _environ = _dl_argv + _dl_argc + 1; +- for (char **tmp = _environ + 1; ; ++tmp) ++ for (char **tmp = _environ; ; ++tmp) + if (*tmp == NULL) + { + /* Another necessary aliasing violation. */ +@@ -88,74 +87,25 @@ _dl_sysdep_start (void **start_argptr, + break; + } + +- user_entry = (ElfW(Addr)) ENTRY_POINT; +- GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ ++ dl_parse_auxv_t auxv_values = { 0, }; ++ _dl_parse_auxv (GLRO(dl_auxv), auxv_values); + +- /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ +- _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), +- "CONSTANT_MINSIGSTKSZ is constant"); +- GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; ++ args->phdr = (const ElfW(Phdr) *) auxv_values[AT_PHDR]; ++ args->phnum = auxv_values[AT_PHNUM]; ++ args->user_entry = auxv_values[AT_ENTRY]; ++} + +- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) +- switch (av->a_type) +- { +- case AT_PHDR: +- phdr = (void *) av->a_un.a_val; +- break; +- case AT_PHNUM: +- phnum = av->a_un.a_val; +- break; +- case AT_PAGESZ: +- GLRO(dl_pagesize) = av->a_un.a_val; +- break; +- case AT_ENTRY: +- user_entry = av->a_un.a_val; +- break; +- case AT_SECURE: +- __libc_enable_secure = av->a_un.a_val; +- break; +- case AT_PLATFORM: +- GLRO(dl_platform) = (void *) av->a_un.a_val; +- break; +- case AT_HWCAP: +- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_HWCAP2: +- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_CLKTCK: +- GLRO(dl_clktck) = av->a_un.a_val; +- break; +- case AT_FPUCW: +- GLRO(dl_fpu_control) = av->a_un.a_val; +- break; +-#ifdef NEED_DL_SYSINFO +- case AT_SYSINFO: +- new_sysinfo = av->a_un.a_val; +- break; +-#endif +- case AT_SYSINFO_EHDR: +- GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; +- break; +- case AT_RANDOM: +- _dl_random = (void *) av->a_un.a_val; +- break; +- case AT_MINSIGSTKSZ: +- GLRO(dl_minsigstacksize) = av->a_un.a_val; +- break; +- DL_PLATFORM_AUXV +- } ++ElfW(Addr) ++_dl_sysdep_start (void **start_argptr, ++ void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, ++ ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) ++{ ++ __libc_stack_end = DL_STACK_END (start_argptr); + +- dl_hwcap_check (); ++ struct dl_main_arguments dl_main_args; ++ _dl_sysdep_parse_arguments (start_argptr, &dl_main_args); + +-#ifdef NEED_DL_SYSINFO +- if (new_sysinfo != 0) +- { +- /* Only set the sysinfo value if we also have the vsyscall DSO. */ +- if (GLRO(dl_sysinfo_dso) != 0) +- GLRO(dl_sysinfo) = new_sysinfo; +- } +-#endif ++ dl_hwcap_check (); + + __tunables_init (_environ); + +@@ -187,8 +137,9 @@ _dl_sysdep_start (void **start_argptr, + if (__builtin_expect (__libc_enable_secure, 0)) + __libc_check_standard_fds (); + +- (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv)); +- return user_entry; ++ (*dl_main) (dl_main_args.phdr, dl_main_args.phnum, ++ &dl_main_args.user_entry, GLRO(dl_auxv)); ++ return dl_main_args.user_entry; + } + + void +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +index 36ba0f3e9e45f3e2..7f35fb531ba22098 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +@@ -16,15 +16,5 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +- +-#if IS_IN (libc) && !defined SHARED +-int GLRO(dl_cache_line_size); +-#endif +- +-/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it +- to dl_cache_line_size. */ +-#define DL_PLATFORM_AUXV \ +- case AT_DCACHEBSIZE: \ +- GLRO(dl_cache_line_size) = av->a_un.a_val; \ +- break; ++#define DL_PLATFORM_AUXV \ ++ GLRO(dl_cache_line_size) = auxv_values[AT_DCACHEBSIZE]; +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c +new file mode 100644 +index 0000000000000000..abe68a704946b90f +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c +@@ -0,0 +1,4 @@ ++#include ++ ++/* Populated from the auxiliary vector. */ ++int _dl_cache_line_size; diff --git a/SOURCES/glibc-upstream-2.34-242.patch b/SOURCES/glibc-upstream-2.34-242.patch new file mode 100644 index 0000000..a120d5c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-242.patch @@ -0,0 +1,399 @@ +commit 1cc4ddfeebdb68e0b6de7e4878eef94d3438706f +Author: Florian Weimer +Date: Fri Feb 11 16:01:19 2022 +0100 + + Revert "Linux: Consolidate auxiliary vector parsing" + + This reverts commit 8c8510ab2790039e58995ef3a22309582413d3ff. The + revert is not perfect because the commit included a bug fix for + _dl_sysdep_start with an empty argv, introduced in commit + 2d47fa68628e831a692cba8fc9050cef435afc5e ("Linux: Remove + DL_FIND_ARG_COMPONENTS"), and this bug fix is kept. + + The revert is necessary because the reverted commit introduced an + early memset call on aarch64, which leads to crash due to lack of TCB + initialization. + + (cherry picked from commit d96d2995c1121d3310102afda2deb1f35761b5e6) + +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 40ef07521336857d..f29dc965f4d10648 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -241,21 +241,93 @@ __rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) + + + #ifdef HAVE_AUX_VECTOR +-#include +- + int _dl_clktck; + + void + _dl_aux_init (ElfW(auxv_t) *av) + { ++ int seen = 0; ++ uid_t uid = 0; ++ gid_t gid = 0; ++ + #ifdef NEED_DL_SYSINFO + /* NB: Avoid RELATIVE relocation in static PIE. */ + GL(dl_sysinfo) = DL_SYSINFO_DEFAULT; + #endif + + _dl_auxv = av; +- dl_parse_auxv_t auxv_values = { 0, }; +- _dl_parse_auxv (av, auxv_values); ++ for (; av->a_type != AT_NULL; ++av) ++ switch (av->a_type) ++ { ++ case AT_PAGESZ: ++ if (av->a_un.a_val != 0) ++ GLRO(dl_pagesize) = av->a_un.a_val; ++ break; ++ case AT_CLKTCK: ++ GLRO(dl_clktck) = av->a_un.a_val; ++ break; ++ case AT_PHDR: ++ GL(dl_phdr) = (const void *) av->a_un.a_val; ++ break; ++ case AT_PHNUM: ++ GL(dl_phnum) = av->a_un.a_val; ++ break; ++ case AT_PLATFORM: ++ GLRO(dl_platform) = (void *) av->a_un.a_val; ++ break; ++ case AT_HWCAP: ++ GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_HWCAP2: ++ GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_FPUCW: ++ GLRO(dl_fpu_control) = av->a_un.a_val; ++ break; ++#ifdef NEED_DL_SYSINFO ++ case AT_SYSINFO: ++ GL(dl_sysinfo) = av->a_un.a_val; ++ break; ++#endif ++#ifdef NEED_DL_SYSINFO_DSO ++ case AT_SYSINFO_EHDR: ++ GL(dl_sysinfo_dso) = (void *) av->a_un.a_val; ++ break; ++#endif ++ case AT_UID: ++ uid ^= av->a_un.a_val; ++ seen |= 1; ++ break; ++ case AT_EUID: ++ uid ^= av->a_un.a_val; ++ seen |= 2; ++ break; ++ case AT_GID: ++ gid ^= av->a_un.a_val; ++ seen |= 4; ++ break; ++ case AT_EGID: ++ gid ^= av->a_un.a_val; ++ seen |= 8; ++ break; ++ case AT_SECURE: ++ seen = -1; ++ __libc_enable_secure = av->a_un.a_val; ++ __libc_enable_secure_decided = 1; ++ break; ++ case AT_RANDOM: ++ _dl_random = (void *) av->a_un.a_val; ++ break; ++ case AT_MINSIGSTKSZ: ++ _dl_minsigstacksize = av->a_un.a_val; ++ break; ++ DL_PLATFORM_AUXV ++ } ++ if (seen == 0xf) ++ { ++ __libc_enable_secure = uid != 0 || gid != 0; ++ __libc_enable_secure_decided = 1; ++ } + } + #endif + +diff --git a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +index 8c99e776a0af9cef..1aa9dca80d189ebe 100644 +--- a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +@@ -20,8 +20,16 @@ + + extern long __libc_alpha_cache_shape[4]; + +-#define DL_PLATFORM_AUXV \ +- __libc_alpha_cache_shape[0] = auxv_values[AT_L1I_CACHESHAPE]; \ +- __libc_alpha_cache_shape[1] = auxv_values[AT_L1D_CACHESHAPE]; \ +- __libc_alpha_cache_shape[2] = auxv_values[AT_L2_CACHESHAPE]; \ +- __libc_alpha_cache_shape[3] = auxv_values[AT_L3_CACHESHAPE]; ++#define DL_PLATFORM_AUXV \ ++ case AT_L1I_CACHESHAPE: \ ++ __libc_alpha_cache_shape[0] = av->a_un.a_val; \ ++ break; \ ++ case AT_L1D_CACHESHAPE: \ ++ __libc_alpha_cache_shape[1] = av->a_un.a_val; \ ++ break; \ ++ case AT_L2_CACHESHAPE: \ ++ __libc_alpha_cache_shape[2] = av->a_un.a_val; \ ++ break; \ ++ case AT_L3_CACHESHAPE: \ ++ __libc_alpha_cache_shape[3] = av->a_un.a_val; \ ++ break; +diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +deleted file mode 100644 +index b3d82f69946d6d2c..0000000000000000 +--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h ++++ /dev/null +@@ -1,61 +0,0 @@ +-/* Parse the Linux auxiliary vector. +- Copyright (C) 1995-2022 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +- +-typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1]; +- +-/* Copy the auxiliary vector into AUX_VALUES and set up GLRO +- variables. */ +-static inline +-void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) +-{ +- auxv_values[AT_ENTRY] = (ElfW(Addr)) ENTRY_POINT; +- auxv_values[AT_PAGESZ] = EXEC_PAGESIZE; +- auxv_values[AT_FPUCW] = _FPU_DEFAULT; +- +- /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ +- _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), +- "CONSTANT_MINSIGSTKSZ is constant"); +- auxv_values[AT_MINSIGSTKSZ] = CONSTANT_MINSIGSTKSZ; +- +- for (; av->a_type != AT_NULL; av++) +- if (av->a_type <= AT_MINSIGSTKSZ) +- auxv_values[av->a_type] = av->a_un.a_val; +- +- GLRO(dl_pagesize) = auxv_values[AT_PAGESZ]; +- __libc_enable_secure = auxv_values[AT_SECURE]; +- GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; +- GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; +- GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; +- GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; +- GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; +- _dl_random = (void *) auxv_values[AT_RANDOM]; +- GLRO(dl_minsigstacksize) = auxv_values[AT_MINSIGSTKSZ]; +- GLRO(dl_sysinfo_dso) = (void *) auxv_values[AT_SYSINFO_EHDR]; +-#ifdef NEED_DL_SYSINFO +- if (GLRO(dl_sysinfo_dso) != NULL) +- GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO]; +-#endif +- +- DL_PLATFORM_AUXV +-} +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index 80aa9f6f4acb7e3c..facaaba3b9d091b3 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -21,12 +21,13 @@ + #include + #include + #include +-#include + #include + #include + #include ++#include + #include + #include ++#include + #include + #include + #include +@@ -62,20 +63,20 @@ void *_dl_random attribute_relro = NULL; + # define DL_STACK_END(cookie) ((void *) (cookie)) + #endif + +-/* Arguments passed to dl_main. */ +-struct dl_main_arguments ++ElfW(Addr) ++_dl_sysdep_start (void **start_argptr, ++ void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, ++ ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) + { +- const ElfW(Phdr) *phdr; +- ElfW(Word) phnum; ++ const ElfW(Phdr) *phdr = NULL; ++ ElfW(Word) phnum = 0; + ElfW(Addr) user_entry; +-}; ++ ElfW(auxv_t) *av; ++#ifdef NEED_DL_SYSINFO ++ uintptr_t new_sysinfo = 0; ++#endif + +-/* Separate function, so that dl_main can be called without the large +- array on the stack. */ +-static void +-_dl_sysdep_parse_arguments (void **start_argptr, +- struct dl_main_arguments *args) +-{ ++ __libc_stack_end = DL_STACK_END (start_argptr); + _dl_argc = (intptr_t) *start_argptr; + _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ + _environ = _dl_argv + _dl_argc + 1; +@@ -87,26 +88,75 @@ _dl_sysdep_parse_arguments (void **start_argptr, + break; + } + +- dl_parse_auxv_t auxv_values = { 0, }; +- _dl_parse_auxv (GLRO(dl_auxv), auxv_values); ++ user_entry = (ElfW(Addr)) ENTRY_POINT; ++ GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ + +- args->phdr = (const ElfW(Phdr) *) auxv_values[AT_PHDR]; +- args->phnum = auxv_values[AT_PHNUM]; +- args->user_entry = auxv_values[AT_ENTRY]; +-} ++ /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ ++ _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), ++ "CONSTANT_MINSIGSTKSZ is constant"); ++ GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; + +-ElfW(Addr) +-_dl_sysdep_start (void **start_argptr, +- void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, +- ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) +-{ +- __libc_stack_end = DL_STACK_END (start_argptr); +- +- struct dl_main_arguments dl_main_args; +- _dl_sysdep_parse_arguments (start_argptr, &dl_main_args); ++ for (av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) ++ switch (av->a_type) ++ { ++ case AT_PHDR: ++ phdr = (void *) av->a_un.a_val; ++ break; ++ case AT_PHNUM: ++ phnum = av->a_un.a_val; ++ break; ++ case AT_PAGESZ: ++ GLRO(dl_pagesize) = av->a_un.a_val; ++ break; ++ case AT_ENTRY: ++ user_entry = av->a_un.a_val; ++ break; ++ case AT_SECURE: ++ __libc_enable_secure = av->a_un.a_val; ++ break; ++ case AT_PLATFORM: ++ GLRO(dl_platform) = (void *) av->a_un.a_val; ++ break; ++ case AT_HWCAP: ++ GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_HWCAP2: ++ GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_CLKTCK: ++ GLRO(dl_clktck) = av->a_un.a_val; ++ break; ++ case AT_FPUCW: ++ GLRO(dl_fpu_control) = av->a_un.a_val; ++ break; ++#ifdef NEED_DL_SYSINFO ++ case AT_SYSINFO: ++ new_sysinfo = av->a_un.a_val; ++ break; ++#endif ++ case AT_SYSINFO_EHDR: ++ GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; ++ break; ++ case AT_RANDOM: ++ _dl_random = (void *) av->a_un.a_val; ++ break; ++ case AT_MINSIGSTKSZ: ++ GLRO(dl_minsigstacksize) = av->a_un.a_val; ++ break; ++ DL_PLATFORM_AUXV ++ } + + dl_hwcap_check (); + ++#ifdef NEED_DL_SYSINFO ++ if (new_sysinfo != 0) ++ { ++ /* Only set the sysinfo value if we also have the vsyscall DSO. */ ++ if (GLRO(dl_sysinfo_dso) != 0) ++ GLRO(dl_sysinfo) = new_sysinfo; ++ } ++#endif ++ + __tunables_init (_environ); + + /* Initialize DSO sorting algorithm after tunables. */ +@@ -137,9 +187,8 @@ _dl_sysdep_start (void **start_argptr, + if (__builtin_expect (__libc_enable_secure, 0)) + __libc_check_standard_fds (); + +- (*dl_main) (dl_main_args.phdr, dl_main_args.phnum, +- &dl_main_args.user_entry, GLRO(dl_auxv)); +- return dl_main_args.user_entry; ++ (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv)); ++ return user_entry; + } + + void +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +index 7f35fb531ba22098..36ba0f3e9e45f3e2 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +@@ -16,5 +16,15 @@ + License along with the GNU C Library; if not, see + . */ + +-#define DL_PLATFORM_AUXV \ +- GLRO(dl_cache_line_size) = auxv_values[AT_DCACHEBSIZE]; ++#include ++ ++#if IS_IN (libc) && !defined SHARED ++int GLRO(dl_cache_line_size); ++#endif ++ ++/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it ++ to dl_cache_line_size. */ ++#define DL_PLATFORM_AUXV \ ++ case AT_DCACHEBSIZE: \ ++ GLRO(dl_cache_line_size) = av->a_un.a_val; \ ++ break; +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c +deleted file mode 100644 +index abe68a704946b90f..0000000000000000 +--- a/sysdeps/unix/sysv/linux/powerpc/dl-support.c ++++ /dev/null +@@ -1,4 +0,0 @@ +-#include +- +-/* Populated from the auxiliary vector. */ +-int _dl_cache_line_size; diff --git a/SOURCES/glibc-upstream-2.34-243.patch b/SOURCES/glibc-upstream-2.34-243.patch new file mode 100644 index 0000000..a9ae285 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-243.patch @@ -0,0 +1,36 @@ +commit 28bdb03b1b2bdb2d2dc62a9beeaa7d9bd2b10679 +Author: Florian Weimer +Date: Fri Feb 11 19:03:04 2022 +0100 + + Linux: Include in dl-sysdep.c only for SHARED + + Otherwise, on POWER ends up being included twice, + once in dl-sysdep.c, once in dl-support.c. That leads to a linker + failure due to multiple definitions of _dl_cache_line_size. + + Fixes commit d96d2995c1121d3310102afda2deb1f35761b5e6 + ("Revert "Linux: Consolidate auxiliary vector parsing"). + + (cherry picked from commit 098c795e85fbd05c5ef59c2d0ce59529331bea27) + +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index facaaba3b9d091b3..3487976b06ad7f58 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -18,7 +18,6 @@ + + #include <_itoa.h> + #include +-#include + #include + #include + #include +@@ -46,6 +45,8 @@ + #include + + #ifdef SHARED ++# include ++ + extern char **_environ attribute_hidden; + extern char _end[] attribute_hidden; + diff --git a/SOURCES/glibc-upstream-2.34-244.patch b/SOURCES/glibc-upstream-2.34-244.patch new file mode 100644 index 0000000..0a4325f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-244.patch @@ -0,0 +1,439 @@ +commit ff900fad89df7fa12750c018993a12cc02474646 +Author: Florian Weimer +Date: Mon Feb 28 11:50:41 2022 +0100 + + Linux: Consolidate auxiliary vector parsing (redo) + + And optimize it slightly. + + This is commit 8c8510ab2790039e58995ef3a22309582413d3ff revised. + + In _dl_aux_init in elf/dl-support.c, use an explicit loop + and -fno-tree-loop-distribute-patterns to avoid memset. + + Reviewed-by: Szabolcs Nagy + (cherry picked from commit 73fc4e28b9464f0e13edc719a5372839970e7ddb) + +diff --git a/elf/Makefile b/elf/Makefile +index c89a6a58690646ee..6423ebbdd7708a14 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -148,6 +148,11 @@ ifeq (yes,$(have-loop-to-function)) + CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns + endif + ++ifeq (yes,$(have-loop-to-function)) ++# Likewise, during static library startup, memset is not yet available. ++CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns ++endif ++ + # Compile rtld itself without stack protection. + # Also compile all routines in the static library that are elided from + # the shared libc because they are in libc.a in the same way. +diff --git a/elf/dl-support.c b/elf/dl-support.c +index f29dc965f4d10648..a2e45e7b14e3a6b9 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + + extern char *__progname; + char **_dl_argv = &__progname; /* This is checked for some error messages. */ +@@ -241,93 +242,25 @@ __rtld_lock_define_initialized_recursive (, _dl_load_tls_lock) + + + #ifdef HAVE_AUX_VECTOR ++#include ++ + int _dl_clktck; + + void + _dl_aux_init (ElfW(auxv_t) *av) + { +- int seen = 0; +- uid_t uid = 0; +- gid_t gid = 0; +- + #ifdef NEED_DL_SYSINFO + /* NB: Avoid RELATIVE relocation in static PIE. */ + GL(dl_sysinfo) = DL_SYSINFO_DEFAULT; + #endif + + _dl_auxv = av; +- for (; av->a_type != AT_NULL; ++av) +- switch (av->a_type) +- { +- case AT_PAGESZ: +- if (av->a_un.a_val != 0) +- GLRO(dl_pagesize) = av->a_un.a_val; +- break; +- case AT_CLKTCK: +- GLRO(dl_clktck) = av->a_un.a_val; +- break; +- case AT_PHDR: +- GL(dl_phdr) = (const void *) av->a_un.a_val; +- break; +- case AT_PHNUM: +- GL(dl_phnum) = av->a_un.a_val; +- break; +- case AT_PLATFORM: +- GLRO(dl_platform) = (void *) av->a_un.a_val; +- break; +- case AT_HWCAP: +- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_HWCAP2: +- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_FPUCW: +- GLRO(dl_fpu_control) = av->a_un.a_val; +- break; +-#ifdef NEED_DL_SYSINFO +- case AT_SYSINFO: +- GL(dl_sysinfo) = av->a_un.a_val; +- break; +-#endif +-#ifdef NEED_DL_SYSINFO_DSO +- case AT_SYSINFO_EHDR: +- GL(dl_sysinfo_dso) = (void *) av->a_un.a_val; +- break; +-#endif +- case AT_UID: +- uid ^= av->a_un.a_val; +- seen |= 1; +- break; +- case AT_EUID: +- uid ^= av->a_un.a_val; +- seen |= 2; +- break; +- case AT_GID: +- gid ^= av->a_un.a_val; +- seen |= 4; +- break; +- case AT_EGID: +- gid ^= av->a_un.a_val; +- seen |= 8; +- break; +- case AT_SECURE: +- seen = -1; +- __libc_enable_secure = av->a_un.a_val; +- __libc_enable_secure_decided = 1; +- break; +- case AT_RANDOM: +- _dl_random = (void *) av->a_un.a_val; +- break; +- case AT_MINSIGSTKSZ: +- _dl_minsigstacksize = av->a_un.a_val; +- break; +- DL_PLATFORM_AUXV +- } +- if (seen == 0xf) +- { +- __libc_enable_secure = uid != 0 || gid != 0; +- __libc_enable_secure_decided = 1; +- } ++ dl_parse_auxv_t auxv_values; ++ /* Use an explicit initialization loop here because memset may not ++ be available yet. */ ++ for (int i = 0; i < array_length (auxv_values); ++i) ++ auxv_values[i] = 0; ++ _dl_parse_auxv (av, auxv_values); + } + #endif + +diff --git a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +index 1aa9dca80d189ebe..8c99e776a0af9cef 100644 +--- a/sysdeps/unix/sysv/linux/alpha/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/alpha/dl-auxv.h +@@ -20,16 +20,8 @@ + + extern long __libc_alpha_cache_shape[4]; + +-#define DL_PLATFORM_AUXV \ +- case AT_L1I_CACHESHAPE: \ +- __libc_alpha_cache_shape[0] = av->a_un.a_val; \ +- break; \ +- case AT_L1D_CACHESHAPE: \ +- __libc_alpha_cache_shape[1] = av->a_un.a_val; \ +- break; \ +- case AT_L2_CACHESHAPE: \ +- __libc_alpha_cache_shape[2] = av->a_un.a_val; \ +- break; \ +- case AT_L3_CACHESHAPE: \ +- __libc_alpha_cache_shape[3] = av->a_un.a_val; \ +- break; ++#define DL_PLATFORM_AUXV \ ++ __libc_alpha_cache_shape[0] = auxv_values[AT_L1I_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[1] = auxv_values[AT_L1D_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[2] = auxv_values[AT_L2_CACHESHAPE]; \ ++ __libc_alpha_cache_shape[3] = auxv_values[AT_L3_CACHESHAPE]; +diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +new file mode 100644 +index 0000000000000000..bf9374371eb217fc +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +@@ -0,0 +1,61 @@ ++/* Parse the Linux auxiliary vector. ++ Copyright (C) 1995-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1]; ++ ++/* Copy the auxiliary vector into AUXV_VALUES and set up GLRO ++ variables. */ ++static inline ++void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) ++{ ++ auxv_values[AT_ENTRY] = (ElfW(Addr)) ENTRY_POINT; ++ auxv_values[AT_PAGESZ] = EXEC_PAGESIZE; ++ auxv_values[AT_FPUCW] = _FPU_DEFAULT; ++ ++ /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ ++ _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), ++ "CONSTANT_MINSIGSTKSZ is constant"); ++ auxv_values[AT_MINSIGSTKSZ] = CONSTANT_MINSIGSTKSZ; ++ ++ for (; av->a_type != AT_NULL; av++) ++ if (av->a_type <= AT_MINSIGSTKSZ) ++ auxv_values[av->a_type] = av->a_un.a_val; ++ ++ GLRO(dl_pagesize) = auxv_values[AT_PAGESZ]; ++ __libc_enable_secure = auxv_values[AT_SECURE]; ++ GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; ++ GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; ++ GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; ++ GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; ++ GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; ++ _dl_random = (void *) auxv_values[AT_RANDOM]; ++ GLRO(dl_minsigstacksize) = auxv_values[AT_MINSIGSTKSZ]; ++ GLRO(dl_sysinfo_dso) = (void *) auxv_values[AT_SYSINFO_EHDR]; ++#ifdef NEED_DL_SYSINFO ++ if (GLRO(dl_sysinfo_dso) != NULL) ++ GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO]; ++#endif ++ ++ DL_PLATFORM_AUXV ++} +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index 3487976b06ad7f58..56db828fc6985de6 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -18,15 +18,14 @@ + + #include <_itoa.h> + #include +-#include ++#include + #include ++#include + #include + #include + #include +-#include + #include + #include +-#include + #include + #include + #include +@@ -43,10 +42,9 @@ + #include + + #include ++#include + + #ifdef SHARED +-# include +- + extern char **_environ attribute_hidden; + extern char _end[] attribute_hidden; + +@@ -64,20 +62,20 @@ void *_dl_random attribute_relro = NULL; + # define DL_STACK_END(cookie) ((void *) (cookie)) + #endif + +-ElfW(Addr) +-_dl_sysdep_start (void **start_argptr, +- void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, +- ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) ++/* Arguments passed to dl_main. */ ++struct dl_main_arguments + { +- const ElfW(Phdr) *phdr = NULL; +- ElfW(Word) phnum = 0; ++ const ElfW(Phdr) *phdr; ++ ElfW(Word) phnum; + ElfW(Addr) user_entry; +- ElfW(auxv_t) *av; +-#ifdef NEED_DL_SYSINFO +- uintptr_t new_sysinfo = 0; +-#endif ++}; + +- __libc_stack_end = DL_STACK_END (start_argptr); ++/* Separate function, so that dl_main can be called without the large ++ array on the stack. */ ++static void ++_dl_sysdep_parse_arguments (void **start_argptr, ++ struct dl_main_arguments *args) ++{ + _dl_argc = (intptr_t) *start_argptr; + _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */ + _environ = _dl_argv + _dl_argc + 1; +@@ -89,74 +87,25 @@ _dl_sysdep_start (void **start_argptr, + break; + } + +- user_entry = (ElfW(Addr)) ENTRY_POINT; +- GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */ ++ dl_parse_auxv_t auxv_values = { 0, }; ++ _dl_parse_auxv (GLRO(dl_auxv), auxv_values); + +- /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */ +- _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ), +- "CONSTANT_MINSIGSTKSZ is constant"); +- GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ; ++ args->phdr = (const ElfW(Phdr) *) auxv_values[AT_PHDR]; ++ args->phnum = auxv_values[AT_PHNUM]; ++ args->user_entry = auxv_values[AT_ENTRY]; ++} + +- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; av++) +- switch (av->a_type) +- { +- case AT_PHDR: +- phdr = (void *) av->a_un.a_val; +- break; +- case AT_PHNUM: +- phnum = av->a_un.a_val; +- break; +- case AT_PAGESZ: +- GLRO(dl_pagesize) = av->a_un.a_val; +- break; +- case AT_ENTRY: +- user_entry = av->a_un.a_val; +- break; +- case AT_SECURE: +- __libc_enable_secure = av->a_un.a_val; +- break; +- case AT_PLATFORM: +- GLRO(dl_platform) = (void *) av->a_un.a_val; +- break; +- case AT_HWCAP: +- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_HWCAP2: +- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val; +- break; +- case AT_CLKTCK: +- GLRO(dl_clktck) = av->a_un.a_val; +- break; +- case AT_FPUCW: +- GLRO(dl_fpu_control) = av->a_un.a_val; +- break; +-#ifdef NEED_DL_SYSINFO +- case AT_SYSINFO: +- new_sysinfo = av->a_un.a_val; +- break; +-#endif +- case AT_SYSINFO_EHDR: +- GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val; +- break; +- case AT_RANDOM: +- _dl_random = (void *) av->a_un.a_val; +- break; +- case AT_MINSIGSTKSZ: +- GLRO(dl_minsigstacksize) = av->a_un.a_val; +- break; +- DL_PLATFORM_AUXV +- } ++ElfW(Addr) ++_dl_sysdep_start (void **start_argptr, ++ void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum, ++ ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv)) ++{ ++ __libc_stack_end = DL_STACK_END (start_argptr); + +- dl_hwcap_check (); ++ struct dl_main_arguments dl_main_args; ++ _dl_sysdep_parse_arguments (start_argptr, &dl_main_args); + +-#ifdef NEED_DL_SYSINFO +- if (new_sysinfo != 0) +- { +- /* Only set the sysinfo value if we also have the vsyscall DSO. */ +- if (GLRO(dl_sysinfo_dso) != 0) +- GLRO(dl_sysinfo) = new_sysinfo; +- } +-#endif ++ dl_hwcap_check (); + + __tunables_init (_environ); + +@@ -188,8 +137,9 @@ _dl_sysdep_start (void **start_argptr, + if (__builtin_expect (__libc_enable_secure, 0)) + __libc_check_standard_fds (); + +- (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv)); +- return user_entry; ++ (*dl_main) (dl_main_args.phdr, dl_main_args.phnum, ++ &dl_main_args.user_entry, GLRO(dl_auxv)); ++ return dl_main_args.user_entry; + } + + void +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +index 36ba0f3e9e45f3e2..7f35fb531ba22098 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h ++++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h +@@ -16,15 +16,5 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +- +-#if IS_IN (libc) && !defined SHARED +-int GLRO(dl_cache_line_size); +-#endif +- +-/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it +- to dl_cache_line_size. */ +-#define DL_PLATFORM_AUXV \ +- case AT_DCACHEBSIZE: \ +- GLRO(dl_cache_line_size) = av->a_un.a_val; \ +- break; ++#define DL_PLATFORM_AUXV \ ++ GLRO(dl_cache_line_size) = auxv_values[AT_DCACHEBSIZE]; +diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c +new file mode 100644 +index 0000000000000000..abe68a704946b90f +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c +@@ -0,0 +1,4 @@ ++#include ++ ++/* Populated from the auxiliary vector. */ ++int _dl_cache_line_size; diff --git a/SOURCES/glibc-upstream-2.34-245.patch b/SOURCES/glibc-upstream-2.34-245.patch new file mode 100644 index 0000000..5ba00fd --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-245.patch @@ -0,0 +1,197 @@ +commit be9240c84c67de44959905a829141576965a0588 +Author: Fangrui Song +Date: Tue Apr 19 15:52:27 2022 -0700 + + elf: Remove __libc_init_secure + + After 73fc4e28b9464f0e13edc719a5372839970e7ddb, + __libc_enable_secure_decided is always 0 and a statically linked + executable may overwrite __libc_enable_secure without considering + AT_SECURE. + + The __libc_enable_secure has been correctly initialized in _dl_aux_init, + so just remove __libc_enable_secure_decided and __libc_init_secure. + This allows us to remove some startup_get*id functions from + 22b79ed7f413cd980a7af0cf258da5bf82b6d5e5. + + Reviewed-by: Florian Weimer + (cherry picked from commit 3e9acce8c50883b6cd8a3fb653363d9fa21e1608) + +diff --git a/csu/libc-start.c b/csu/libc-start.c +index d01e57ea59ceb880..a2fc2f6f9665a48f 100644 +--- a/csu/libc-start.c ++++ b/csu/libc-start.c +@@ -285,9 +285,6 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + } + } + +- /* Initialize very early so that tunables can use it. */ +- __libc_init_secure (); +- + __tunables_init (__environ); + + ARCH_INIT_CPU_FEATURES (); +diff --git a/elf/enbl-secure.c b/elf/enbl-secure.c +index 9e47526bd3e444e1..1208610bd0670c74 100644 +--- a/elf/enbl-secure.c ++++ b/elf/enbl-secure.c +@@ -26,15 +26,5 @@ + #include + #include + +-/* If nonzero __libc_enable_secure is already set. */ +-int __libc_enable_secure_decided; + /* Safest assumption, if somehow the initializer isn't run. */ + int __libc_enable_secure = 1; +- +-void +-__libc_init_secure (void) +-{ +- if (__libc_enable_secure_decided == 0) +- __libc_enable_secure = (startup_geteuid () != startup_getuid () +- || startup_getegid () != startup_getgid ()); +-} +diff --git a/include/libc-internal.h b/include/libc-internal.h +index 749dfb919ce4a62d..44fcb6bdf8751c1c 100644 +--- a/include/libc-internal.h ++++ b/include/libc-internal.h +@@ -21,9 +21,6 @@ + + #include + +-/* Initialize the `__libc_enable_secure' flag. */ +-extern void __libc_init_secure (void); +- + /* Discover the tick frequency of the machine if something goes wrong, + we return 0, an impossible hertz. */ + extern int __profile_frequency (void); +diff --git a/include/unistd.h b/include/unistd.h +index 7849562c4272e2c9..5824485629793ccb 100644 +--- a/include/unistd.h ++++ b/include/unistd.h +@@ -180,7 +180,6 @@ libc_hidden_proto (__sbrk) + and some functions contained in the C library ignore various + environment variables that normally affect them. */ + extern int __libc_enable_secure attribute_relro; +-extern int __libc_enable_secure_decided; + rtld_hidden_proto (__libc_enable_secure) + + +diff --git a/sysdeps/generic/startup.h b/sysdeps/generic/startup.h +index 04f20cde474cea89..c3be5430bd8bbaa6 100644 +--- a/sysdeps/generic/startup.h ++++ b/sysdeps/generic/startup.h +@@ -23,27 +23,3 @@ + + /* Use macro instead of inline function to avoid including . */ + #define _startup_fatal(message) __libc_fatal ((message)) +- +-static inline uid_t +-startup_getuid (void) +-{ +- return __getuid (); +-} +- +-static inline uid_t +-startup_geteuid (void) +-{ +- return __geteuid (); +-} +- +-static inline gid_t +-startup_getgid (void) +-{ +- return __getgid (); +-} +- +-static inline gid_t +-startup_getegid (void) +-{ +- return __getegid (); +-} +diff --git a/sysdeps/mach/hurd/enbl-secure.c b/sysdeps/mach/hurd/enbl-secure.c +deleted file mode 100644 +index 3e9a6b888d56754b..0000000000000000 +--- a/sysdeps/mach/hurd/enbl-secure.c ++++ /dev/null +@@ -1,30 +0,0 @@ +-/* Define and initialize the `__libc_enable_secure' flag. Hurd version. +- Copyright (C) 1998-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-/* There is no need for this file in the Hurd; it is just a placeholder +- to prevent inclusion of the sysdeps/generic version. +- In the shared library, the `__libc_enable_secure' variable is defined +- by the dynamic linker in dl-sysdep.c and set there. +- In the static library, it is defined in init-first.c and set there. */ +- +-#include +- +-void +-__libc_init_secure (void) +-{ +-} +diff --git a/sysdeps/mach/hurd/i386/init-first.c b/sysdeps/mach/hurd/i386/init-first.c +index a430aae085527163..4dc9017ec8754a1a 100644 +--- a/sysdeps/mach/hurd/i386/init-first.c ++++ b/sysdeps/mach/hurd/i386/init-first.c +@@ -38,10 +38,6 @@ extern void __init_misc (int, char **, char **); + unsigned long int __hurd_threadvar_stack_offset; + unsigned long int __hurd_threadvar_stack_mask; + +-#ifndef SHARED +-int __libc_enable_secure; +-#endif +- + extern int __libc_argc attribute_hidden; + extern char **__libc_argv attribute_hidden; + extern char **_dl_argv; +diff --git a/sysdeps/unix/sysv/linux/i386/startup.h b/sysdeps/unix/sysv/linux/i386/startup.h +index dee7a4f1d3d420be..192c765361c17ed1 100644 +--- a/sysdeps/unix/sysv/linux/i386/startup.h ++++ b/sysdeps/unix/sysv/linux/i386/startup.h +@@ -32,30 +32,6 @@ _startup_fatal (const char *message __attribute__ ((unused))) + ABORT_INSTRUCTION; + __builtin_unreachable (); + } +- +-static inline uid_t +-startup_getuid (void) +-{ +- return (uid_t) INTERNAL_SYSCALL_CALL (getuid32); +-} +- +-static inline uid_t +-startup_geteuid (void) +-{ +- return (uid_t) INTERNAL_SYSCALL_CALL (geteuid32); +-} +- +-static inline gid_t +-startup_getgid (void) +-{ +- return (gid_t) INTERNAL_SYSCALL_CALL (getgid32); +-} +- +-static inline gid_t +-startup_getegid (void) +-{ +- return (gid_t) INTERNAL_SYSCALL_CALL (getegid32); +-} + #else + # include_next + #endif diff --git a/SOURCES/glibc-upstream-2.34-246.patch b/SOURCES/glibc-upstream-2.34-246.patch new file mode 100644 index 0000000..76c7b68 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-246.patch @@ -0,0 +1,31 @@ +commit 1e7b011f87c653ad109b34e675f64e7a5cc3805a +Author: Florian Weimer +Date: Wed May 4 15:37:21 2022 +0200 + + i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S + + After commit a78e6a10d0b50d0ca80309775980fc99944b1727 + ("i386: Remove broken CAN_USE_REGISTER_ASM_EBP (bug 28771)"), + it is never defined. + + Reviewed-by: H.J. Lu + (cherry picked from commit 6e5c7a1e262961adb52443ab91bd2c9b72316402) + +diff --git a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S +index c95f297d6f0217ef..404435f0123b23b3 100644 +--- a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S ++++ b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S +@@ -18,8 +18,6 @@ + + #include + +-#ifndef OPTIMIZE_FOR_GCC_5 +- + /* %eax, %ecx, %edx and %esi contain the values expected by the kernel. + %edi points to a structure with the values of %ebx, %edi and %ebp. */ + +@@ -50,4 +48,3 @@ ENTRY (__libc_do_syscall) + cfi_restore (ebx) + ret + END (__libc_do_syscall) +-#endif diff --git a/SOURCES/glibc-upstream-2.34-247.patch b/SOURCES/glibc-upstream-2.34-247.patch new file mode 100644 index 0000000..c6b2961 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-247.patch @@ -0,0 +1,94 @@ +commit 1a5b9d1a231ae788aac3520dab07dc856e404c69 +Author: Florian Weimer +Date: Wed May 4 15:37:21 2022 +0200 + + i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls + + Introduce an int-80h-based version of __libc_do_syscall and use + it if I386_USE_SYSENTER is defined as 0. + + Reviewed-by: H.J. Lu + (cherry picked from commit 60f0f2130d30cfd008ca39743027f1e200592dff) + +diff --git a/sysdeps/unix/sysv/linux/i386/Makefile b/sysdeps/unix/sysv/linux/i386/Makefile +index abd0009d58f06303..e379a2e767d96322 100644 +--- a/sysdeps/unix/sysv/linux/i386/Makefile ++++ b/sysdeps/unix/sysv/linux/i386/Makefile +@@ -14,7 +14,7 @@ install-bin += lddlibc4 + endif + + ifeq ($(subdir),io) +-sysdep_routines += libc-do-syscall ++sysdep_routines += libc-do-syscall libc-do-syscall-int80 + endif + + ifeq ($(subdir),stdlib) +diff --git a/sysdeps/unix/sysv/linux/i386/libc-do-syscall-int80.S b/sysdeps/unix/sysv/linux/i386/libc-do-syscall-int80.S +new file mode 100644 +index 0000000000000000..2c472f255734b357 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/i386/libc-do-syscall-int80.S +@@ -0,0 +1,25 @@ ++/* Out-of-line syscall stub for six-argument syscalls from C. For static PIE. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SHARED ++# define I386_USE_SYSENTER 0 ++# include ++ ++# define __libc_do_syscall __libc_do_syscall_int80 ++# include "libc-do-syscall.S" ++#endif +diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h +index 39d6a3c13427abb5..4c6358c7fe43fe0b 100644 +--- a/sysdeps/unix/sysv/linux/i386/sysdep.h ++++ b/sysdeps/unix/sysv/linux/i386/sysdep.h +@@ -43,6 +43,15 @@ + # endif + #endif + ++#if !I386_USE_SYSENTER && IS_IN (libc) && !defined SHARED ++/* Inside static libc, we have two versions. For compilation units ++ with !I386_USE_SYSENTER, the vDSO entry mechanism cannot be ++ used. */ ++# define I386_DO_SYSCALL_STRING "__libc_do_syscall_int80" ++#else ++# define I386_DO_SYSCALL_STRING "__libc_do_syscall" ++#endif ++ + #ifdef __ASSEMBLER__ + + /* Linux uses a negative return value to indicate syscall errors, +@@ -302,7 +311,7 @@ struct libc_do_syscall_args + }; \ + asm volatile ( \ + "movl %1, %%eax\n\t" \ +- "call __libc_do_syscall" \ ++ "call " I386_DO_SYSCALL_STRING \ + : "=a" (resultvar) \ + : "i" (__NR_##name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \ + : "memory", "cc") +@@ -316,7 +325,7 @@ struct libc_do_syscall_args + }; \ + asm volatile ( \ + "movl %1, %%eax\n\t" \ +- "call __libc_do_syscall" \ ++ "call " I386_DO_SYSCALL_STRING \ + : "=a" (resultvar) \ + : "a" (name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \ + : "memory", "cc") diff --git a/SOURCES/glibc-upstream-2.34-248.patch b/SOURCES/glibc-upstream-2.34-248.patch new file mode 100644 index 0000000..dda3e73 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-248.patch @@ -0,0 +1,93 @@ +commit b38c9cdb58061d357cdf9bca4f6967d487becb82 +Author: Florian Weimer +Date: Wed May 4 15:37:21 2022 +0200 + + Linux: Define MMAP_CALL_INTERNAL + + Unlike MMAP_CALL, this avoids a TCB dependency for an errno update + on failure. + + cannot be included as is on several architectures + due to the definition of page_unit, so introduce a separate header + file for the definition of MMAP_CALL and MMAP_CALL_INTERNAL, + . + + Reviewed-by: Stefan Liebler + (cherry picked from commit c1b68685d438373efe64e5f076f4215723004dfb) + +diff --git a/sysdeps/unix/sysv/linux/mmap_call.h b/sysdeps/unix/sysv/linux/mmap_call.h +new file mode 100644 +index 0000000000000000..3547c99e149e5064 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/mmap_call.h +@@ -0,0 +1,22 @@ ++/* Generic definition of MMAP_CALL and MMAP_CALL_INTERNAL. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \ ++ INLINE_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset) ++#define MMAP_CALL_INTERNAL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \ ++ INTERNAL_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset) +diff --git a/sysdeps/unix/sysv/linux/mmap_internal.h b/sysdeps/unix/sysv/linux/mmap_internal.h +index 5ca6976191137f95..989eb0c7c6b57dc1 100644 +--- a/sysdeps/unix/sysv/linux/mmap_internal.h ++++ b/sysdeps/unix/sysv/linux/mmap_internal.h +@@ -40,10 +40,6 @@ static uint64_t page_unit; + /* Do not accept offset not multiple of page size. */ + #define MMAP_OFF_LOW_MASK (MMAP2_PAGE_UNIT - 1) + +-/* An architecture may override this. */ +-#ifndef MMAP_CALL +-# define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \ +- INLINE_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset) +-#endif ++#include + + #endif /* MMAP_INTERNAL_LINUX_H */ +diff --git a/sysdeps/unix/sysv/linux/s390/mmap_internal.h b/sysdeps/unix/sysv/linux/s390/mmap_call.h +similarity index 78% +rename from sysdeps/unix/sysv/linux/s390/mmap_internal.h +rename to sysdeps/unix/sysv/linux/s390/mmap_call.h +index 46f1c3769d6b586a..bdd30cc83764c2c1 100644 +--- a/sysdeps/unix/sysv/linux/s390/mmap_internal.h ++++ b/sysdeps/unix/sysv/linux/s390/mmap_call.h +@@ -16,9 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#ifndef MMAP_S390_INTERNAL_H +-# define MMAP_S390_INTERNAL_H +- + #define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \ + ({ \ + long int __args[6] = { (long int) (__addr), (long int) (__len), \ +@@ -26,7 +23,10 @@ + (long int) (__fd), (long int) (__offset) }; \ + INLINE_SYSCALL_CALL (__nr, __args); \ + }) +- +-#include_next +- +-#endif ++#define MMAP_CALL_INTERNAL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \ ++ ({ \ ++ long int __args[6] = { (long int) (__addr), (long int) (__len), \ ++ (long int) (__prot), (long int) (__flags), \ ++ (long int) (__fd), (long int) (__offset) }; \ ++ INTERNAL_SYSCALL_CALL (__nr, __args); \ ++ }) diff --git a/SOURCES/glibc-upstream-2.34-249.patch b/SOURCES/glibc-upstream-2.34-249.patch new file mode 100644 index 0000000..7b48d3f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-249.patch @@ -0,0 +1,88 @@ +commit b2387bea84560d286613257139aba6787f414594 +Author: Florian Weimer +Date: Mon May 9 18:15:16 2022 +0200 + + ia64: Always define IA64_USE_NEW_STUB as a flag macro + + And keep the previous definition if it exists. This allows + disabling IA64_USE_NEW_STUB while keeping USE_DL_SYSINFO defined. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 18bd9c3d3b1b6a9182698c85354578d1d58e9d64) + +diff --git a/sysdeps/unix/sysv/linux/ia64/brk.c b/sysdeps/unix/sysv/linux/ia64/brk.c +index cf2c5bd667fb4432..61d8fa260eb59d1e 100644 +--- a/sysdeps/unix/sysv/linux/ia64/brk.c ++++ b/sysdeps/unix/sysv/linux/ia64/brk.c +@@ -16,7 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-/* brk is used by statup before TCB is properly set. */ +-#undef USE_DL_SYSINFO ++/* brk is used by startup before TCB is properly set up. */ ++#define IA64_USE_NEW_STUB 0 + #include +diff --git a/sysdeps/unix/sysv/linux/ia64/sysdep.h b/sysdeps/unix/sysv/linux/ia64/sysdep.h +index 7198c192a03b7676..f1c81a66833941cc 100644 +--- a/sysdeps/unix/sysv/linux/ia64/sysdep.h ++++ b/sysdeps/unix/sysv/linux/ia64/sysdep.h +@@ -46,12 +46,15 @@ + #undef SYS_ify + #define SYS_ify(syscall_name) __NR_##syscall_name + +-#if defined USE_DL_SYSINFO \ +- && (IS_IN (libc) \ +- || IS_IN (libpthread) || IS_IN (librt)) +-# define IA64_USE_NEW_STUB +-#else +-# undef IA64_USE_NEW_STUB ++#ifndef IA64_USE_NEW_STUB ++# if defined USE_DL_SYSINFO && IS_IN (libc) ++# define IA64_USE_NEW_STUB 1 ++# else ++# define IA64_USE_NEW_STUB 0 ++# endif ++#endif ++#if IA64_USE_NEW_STUB && !USE_DL_SYSINFO ++# error IA64_USE_NEW_STUB needs USE_DL_SYSINFO + #endif + + #ifdef __ASSEMBLER__ +@@ -103,7 +106,7 @@ + mov r15=num; \ + break __IA64_BREAK_SYSCALL + +-#ifdef IA64_USE_NEW_STUB ++#if IA64_USE_NEW_STUB + # ifdef SHARED + # define DO_CALL(num) \ + .prologue; \ +@@ -187,7 +190,7 @@ + (non-negative) errno on error or the return value on success. + */ + +-#ifdef IA64_USE_NEW_STUB ++#if IA64_USE_NEW_STUB + + # define INTERNAL_SYSCALL_NCS(name, nr, args...) \ + ({ \ +@@ -279,7 +282,7 @@ + #define ASM_OUTARGS_5 ASM_OUTARGS_4, "=r" (_out4) + #define ASM_OUTARGS_6 ASM_OUTARGS_5, "=r" (_out5) + +-#ifdef IA64_USE_NEW_STUB ++#if IA64_USE_NEW_STUB + #define ASM_ARGS_0 + #define ASM_ARGS_1 ASM_ARGS_0, "4" (_out0) + #define ASM_ARGS_2 ASM_ARGS_1, "5" (_out1) +@@ -315,7 +318,7 @@ + /* Branch registers. */ \ + "b6" + +-#ifdef IA64_USE_NEW_STUB ++#if IA64_USE_NEW_STUB + # define ASM_CLOBBERS_6 ASM_CLOBBERS_6_COMMON + #else + # define ASM_CLOBBERS_6 ASM_CLOBBERS_6_COMMON , "b7" diff --git a/SOURCES/glibc-upstream-2.34-250.patch b/SOURCES/glibc-upstream-2.34-250.patch new file mode 100644 index 0000000..f552acc --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-250.patch @@ -0,0 +1,121 @@ +commit e7ca2a475cf2e7ffc987b8d08e1a40337840b500 +Author: Florian Weimer +Date: Mon May 9 18:15:16 2022 +0200 + + Linux: Implement a useful version of _startup_fatal + + On i386 and ia64, the TCB is not available at this point. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit a2a6bce7d7e52c1c34369a7da62c501cc350bc31) + +diff --git a/sysdeps/unix/sysv/linux/i386/startup.h b/sysdeps/unix/sysv/linux/i386/startup.h +index 192c765361c17ed1..213805d7d2d459be 100644 +--- a/sysdeps/unix/sysv/linux/i386/startup.h ++++ b/sysdeps/unix/sysv/linux/i386/startup.h +@@ -1,5 +1,5 @@ + /* Linux/i386 definitions of functions used by static libc main startup. +- Copyright (C) 2017-2021 Free Software Foundation, Inc. ++ Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,22 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + +-#if BUILD_PIE_DEFAULT +-/* Can't use "call *%gs:SYSINFO_OFFSET" during statup in static PIE. */ +-# define I386_USE_SYSENTER 0 ++/* Can't use "call *%gs:SYSINFO_OFFSET" during startup. */ ++#define I386_USE_SYSENTER 0 + +-# include +-# include +- +-__attribute__ ((__noreturn__)) +-static inline void +-_startup_fatal (const char *message __attribute__ ((unused))) +-{ +- /* This is only called very early during startup in static PIE. +- FIXME: How can it be improved? */ +- ABORT_INSTRUCTION; +- __builtin_unreachable (); +-} +-#else +-# include_next +-#endif ++#include_next +diff --git a/sysdeps/unix/sysv/linux/ia64/startup.h b/sysdeps/unix/sysv/linux/ia64/startup.h +new file mode 100644 +index 0000000000000000..77f29f15a2103ed5 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/ia64/startup.h +@@ -0,0 +1,22 @@ ++/* Linux/ia64 definitions of functions used by static libc main startup. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This code is used before the TCB is set up. */ ++#define IA64_USE_NEW_STUB 0 ++ ++#include_next +diff --git a/sysdeps/unix/sysv/linux/startup.h b/sysdeps/unix/sysv/linux/startup.h +new file mode 100644 +index 0000000000000000..39859b404a84798b +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/startup.h +@@ -0,0 +1,39 @@ ++/* Linux definitions of functions used by static libc main startup. ++ Copyright (C) 2017-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifdef SHARED ++# include_next ++#else ++# include ++ ++/* Avoid a run-time invocation of strlen. */ ++#define _startup_fatal(message) \ ++ do \ ++ { \ ++ size_t __message_length = __builtin_strlen (message); \ ++ if (! __builtin_constant_p (__message_length)) \ ++ { \ ++ extern void _startup_fatal_not_constant (void); \ ++ _startup_fatal_not_constant (); \ ++ } \ ++ INTERNAL_SYSCALL_CALL (write, STDERR_FILENO, (message), \ ++ __message_length); \ ++ INTERNAL_SYSCALL_CALL (exit_group, 127); \ ++ } \ ++ while (0) ++#endif /* !SHARED */ diff --git a/SOURCES/glibc-upstream-2.34-251.patch b/SOURCES/glibc-upstream-2.34-251.patch new file mode 100644 index 0000000..9f5a590 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-251.patch @@ -0,0 +1,150 @@ +commit 43d77ef9b87533221890423e491eed1b8ca81f0c +Author: Florian Weimer +Date: Mon May 16 18:41:43 2022 +0200 + + Linux: Introduce __brk_call for invoking the brk system call + + Alpha and sparc can now use the generic implementation. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit b57ab258c1140bc45464b4b9908713e3e0ee35aa) + +diff --git a/sysdeps/unix/sysv/linux/alpha/brk_call.h b/sysdeps/unix/sysv/linux/alpha/brk_call.h +new file mode 100644 +index 0000000000000000..b8088cf13f938c88 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/alpha/brk_call.h +@@ -0,0 +1,28 @@ ++/* Invoke the brk system call. Alpha version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++static inline void * ++__brk_call (void *addr) ++{ ++ unsigned long int result = INTERNAL_SYSCALL_CALL (brk, addr); ++ if (result == -ENOMEM) ++ /* Mimic the default error reporting behavior. */ ++ return addr; ++ else ++ return (void *) result; ++} +diff --git a/sysdeps/unix/sysv/linux/brk.c b/sysdeps/unix/sysv/linux/brk.c +index 2d70d824fc72d32d..20b11c15caae148d 100644 +--- a/sysdeps/unix/sysv/linux/brk.c ++++ b/sysdeps/unix/sysv/linux/brk.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + /* This must be initialized data because commons can't have aliases. */ + void *__curbrk = 0; +@@ -33,7 +34,7 @@ weak_alias (__curbrk, ___brk_addr) + int + __brk (void *addr) + { +- __curbrk = (void *) INTERNAL_SYSCALL_CALL (brk, addr); ++ __curbrk = __brk_call (addr); + if (__curbrk < addr) + { + __set_errno (ENOMEM); +diff --git a/sysdeps/unix/sysv/linux/brk_call.h b/sysdeps/unix/sysv/linux/brk_call.h +new file mode 100644 +index 0000000000000000..72370c25d785a9ab +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/brk_call.h +@@ -0,0 +1,25 @@ ++/* Invoke the brk system call. Generic Linux version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++static inline void * ++__brk_call (void *addr) ++{ ++ /* The default implementation reports errors through an unchanged ++ break. */ ++ return (void *) INTERNAL_SYSCALL_CALL (brk, addr); ++} +diff --git a/sysdeps/unix/sysv/linux/alpha/brk.c b/sysdeps/unix/sysv/linux/sparc/brk_call.h +similarity index 61% +rename from sysdeps/unix/sysv/linux/alpha/brk.c +rename to sysdeps/unix/sysv/linux/sparc/brk_call.h +index 074c47e054bfeb11..59ce5216601143fb 100644 +--- a/sysdeps/unix/sysv/linux/alpha/brk.c ++++ b/sysdeps/unix/sysv/linux/sparc/brk_call.h +@@ -1,5 +1,5 @@ +-/* Change data segment size. Linux/Alpha. +- Copyright (C) 2020-2021 Free Software Foundation, Inc. ++/* Invoke the brk system call. Sparc version. ++ Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,23 +16,20 @@ + License along with the GNU C Library. If not, see + . */ + +-#include +-#include +-#include ++#ifdef __arch64__ ++# define SYSCALL_NUM "0x6d" ++#else ++# define SYSCALL_NUM "0x10" ++#endif + +-void *__curbrk = 0; +- +-int +-__brk (void *addr) ++static inline void * ++__brk_call (void *addr) + { +- /* Alpha brk returns -ENOMEM in case of failure. */ +- __curbrk = (void *) INTERNAL_SYSCALL_CALL (brk, addr); +- if ((unsigned long) __curbrk == -ENOMEM) +- { +- __set_errno (ENOMEM); +- return -1; +- } +- +- return 0; ++ register long int g1 asm ("g1") = __NR_brk; ++ register long int o0 asm ("o0") = (long int) addr; ++ asm volatile ("ta " SYSCALL_NUM ++ : "=r"(o0) ++ : "r"(g1), "0"(o0) ++ : "cc"); ++ return (void *) o0; + } +-weak_alias (__brk, brk) diff --git a/SOURCES/glibc-upstream-2.34-252.patch b/SOURCES/glibc-upstream-2.34-252.patch new file mode 100644 index 0000000..b607fcc --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-252.patch @@ -0,0 +1,510 @@ +commit ede8d94d154157d269b18f3601440ac576c1f96a +Author: Florian Weimer +Date: Mon May 16 18:41:43 2022 +0200 + + csu: Implement and use _dl_early_allocate during static startup + + This implements mmap fallback for a brk failure during TLS + allocation. + + scripts/tls-elf-edit.py is updated to support the new patching method. + The script no longer requires that in the input object is of ET_DYN + type. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit f787e138aa0bf677bf74fa2a08595c446292f3d7) + +Conflicts: + elf/Makefile + (missing ld.so static execve backport upstream) + sysdeps/generic/ldsodefs.h + (missing ld.so dependency sorting optimization upstream) + +diff --git a/csu/libc-tls.c b/csu/libc-tls.c +index d83e69f6257ae981..738f59f46b62c31c 100644 +--- a/csu/libc-tls.c ++++ b/csu/libc-tls.c +@@ -145,11 +145,16 @@ __libc_setup_tls (void) + _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign + and dl_tls_static_align. */ + tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align); +- tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align); ++ tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align); ++ if (tlsblock == NULL) ++ _startup_fatal ("Fatal glibc error: Cannot allocate TLS block\n"); + #elif TLS_DTV_AT_TP + tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1); +- tlsblock = __sbrk (tcb_offset + memsz + max_align +- + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus)); ++ tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align ++ + TLS_PRE_TCB_SIZE ++ + GLRO(dl_tls_static_surplus)); ++ if (tlsblock == NULL) ++ _startup_fatal ("Fatal glibc error: Cannot allocate TLS block\n"); + tlsblock += TLS_PRE_TCB_SIZE; + #else + /* In case a model with a different layout for the TCB and DTV +diff --git a/elf/Makefile b/elf/Makefile +index 6423ebbdd7708a14..ea1512549be3f628 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -33,6 +33,7 @@ routines = \ + $(all-dl-routines) \ + dl-addr \ + dl-addr-obj \ ++ dl-early_allocate \ + dl-error \ + dl-iteratephdr \ + dl-libc \ +@@ -104,6 +105,7 @@ all-dl-routines = $(dl-routines) $(sysdep-dl-routines) + # But they are absent from the shared libc, because that code is in ld.so. + elide-routines.os = \ + $(all-dl-routines) \ ++ dl-early_allocate \ + dl-exception \ + dl-origin \ + dl-reloc-static-pie \ +@@ -264,6 +266,7 @@ tests-static-normal := \ + tst-linkall-static \ + tst-single_threaded-pthread-static \ + tst-single_threaded-static \ ++ tst-tls-allocation-failure-static \ + tst-tlsalign-extern-static \ + tst-tlsalign-static \ + # tests-static-normal +@@ -1101,6 +1104,10 @@ $(objpfx)tst-glibcelf.out: tst-glibcelf.py elf.h $(..)/scripts/glibcelf.py \ + --cc="$(CC) $(patsubst -DMODULE_NAME=%,-DMODULE_NAME=testsuite,$(CPPFLAGS))" \ + < /dev/null > $@ 2>&1; $(evaluate-test) + ++ifeq ($(run-built-tests),yes) ++tests-special += $(objpfx)tst-tls-allocation-failure-static-patched.out ++endif ++ + # The test requires shared _and_ PIE because the executable + # unit test driver must be able to link with the shared object + # that is going to eventually go into an installed DSO. +@@ -2637,3 +2644,15 @@ $(objpfx)tst-ro-dynamic-mod.so: $(objpfx)tst-ro-dynamic-mod.os \ + $(objpfx)tst-ro-dynamic-mod.os + + $(objpfx)tst-rtld-run-static.out: $(objpfx)/ldconfig ++ ++$(objpfx)tst-tls-allocation-failure-static-patched: \ ++ $(objpfx)tst-tls-allocation-failure-static $(..)scripts/tst-elf-edit.py ++ cp $< $@ ++ $(PYTHON) $(..)scripts/tst-elf-edit.py --maximize-tls-size $@ ++ ++$(objpfx)tst-tls-allocation-failure-static-patched.out: \ ++ $(objpfx)tst-tls-allocation-failure-static-patched ++ $< > $@ 2>&1; echo "status: $$?" >> $@ ++ grep -q '^Fatal glibc error: Cannot allocate TLS block$$' $@ \ ++ && grep -q '^status: 127$$' $@; \ ++ $(evaluate-test) +diff --git a/elf/dl-early_allocate.c b/elf/dl-early_allocate.c +new file mode 100644 +index 0000000000000000..61677aaa0364c209 +--- /dev/null ++++ b/elf/dl-early_allocate.c +@@ -0,0 +1,30 @@ ++/* Early memory allocation for the dynamic loader. Generic version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++void * ++_dl_early_allocate (size_t size) ++{ ++ void *result = __sbrk (size); ++ if (result == (void *) -1) ++ result = NULL; ++ return result; ++} +diff --git a/elf/tst-tls-allocation-failure-static.c b/elf/tst-tls-allocation-failure-static.c +new file mode 100644 +index 0000000000000000..8de831b2469ba390 +--- /dev/null ++++ b/elf/tst-tls-allocation-failure-static.c +@@ -0,0 +1,31 @@ ++/* Base for test program with impossiblyh large PT_TLS segment. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* The test actual binary is patched using scripts/tst-elf-edit.py ++ --maximize-tls-size, and this introduces the expected test ++ allocation failure due to an excessive PT_LS p_memsz value. ++ ++ Patching the binary is required because on some 64-bit targets, TLS ++ relocations can only cover a 32-bit range, and glibc-internal TLS ++ variables such as errno end up outside that range. */ ++ ++int ++main (void) ++{ ++ return 0; ++} +diff --git a/scripts/tst-elf-edit.py b/scripts/tst-elf-edit.py +new file mode 100644 +index 0000000000000000..0e19ce1e7392f3ca +--- /dev/null ++++ b/scripts/tst-elf-edit.py +@@ -0,0 +1,226 @@ ++#!/usr/bin/python3 ++# ELF editor for load align tests. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# Copyright The GNU Toolchain Authors. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++import argparse ++import os ++import sys ++import struct ++ ++EI_NIDENT=16 ++ ++EI_MAG0=0 ++ELFMAG0=b'\x7f' ++EI_MAG1=1 ++ELFMAG1=b'E' ++EI_MAG2=2 ++ELFMAG2=b'L' ++EI_MAG3=3 ++ELFMAG3=b'F' ++ ++EI_CLASS=4 ++ELFCLASSNONE=b'0' ++ELFCLASS32=b'\x01' ++ELFCLASS64=b'\x02' ++ ++EI_DATA=5 ++ELFDATA2LSB=b'\x01' ++ELFDATA2MSB=b'\x02' ++ ++ET_EXEC=2 ++ET_DYN=3 ++ ++PT_LOAD=1 ++PT_TLS=7 ++ ++def elf_types_fmts(e_ident): ++ endian = '<' if e_ident[EI_DATA] == ELFDATA2LSB else '>' ++ addr = 'I' if e_ident[EI_CLASS] == ELFCLASS32 else 'Q' ++ off = 'I' if e_ident[EI_CLASS] == ELFCLASS32 else 'Q' ++ return (endian, addr, off) ++ ++class Elf_Ehdr: ++ def __init__(self, e_ident): ++ endian, addr, off = elf_types_fmts(e_ident) ++ self.fmt = '{0}HHI{1}{2}{2}IHHHHHH'.format(endian, addr, off) ++ self.len = struct.calcsize(self.fmt) ++ ++ def read(self, f): ++ buf = f.read(self.len) ++ if not buf: ++ error('{}: header too small'.format(f.name)) ++ data = struct.unpack(self.fmt, buf) ++ self.e_type = data[0] ++ self.e_machine = data[1] ++ self.e_version = data[2] ++ self.e_entry = data[3] ++ self.e_phoff = data[4] ++ self.e_shoff = data[5] ++ self.e_flags = data[6] ++ self.e_ehsize = data[7] ++ self.e_phentsize= data[8] ++ self.e_phnum = data[9] ++ self.e_shstrndx = data[10] ++ ++ ++class Elf_Phdr: ++ def __init__(self, e_ident): ++ endian, addr, off = elf_types_fmts(e_ident) ++ self.ei_class = e_ident[EI_CLASS] ++ if self.ei_class == ELFCLASS32: ++ self.fmt = '{0}I{2}{1}{1}IIII'.format(endian, addr, off) ++ else: ++ self.fmt = '{0}II{2}{1}{1}QQQ'.format(endian, addr, off) ++ self.len = struct.calcsize(self.fmt) ++ ++ def read(self, f): ++ buf = f.read(self.len) ++ if len(buf) < self.len: ++ error('{}: program header too small'.format(f.name)) ++ data = struct.unpack(self.fmt, buf) ++ if self.ei_class == ELFCLASS32: ++ self.p_type = data[0] ++ self.p_offset = data[1] ++ self.p_vaddr = data[2] ++ self.p_paddr = data[3] ++ self.p_filesz = data[4] ++ self.p_memsz = data[5] ++ self.p_flags = data[6] ++ self.p_align = data[7] ++ else: ++ self.p_type = data[0] ++ self.p_flags = data[1] ++ self.p_offset = data[2] ++ self.p_vaddr = data[3] ++ self.p_paddr = data[4] ++ self.p_filesz = data[5] ++ self.p_memsz = data[6] ++ self.p_align = data[7] ++ ++ def write(self, f): ++ if self.ei_class == ELFCLASS32: ++ data = struct.pack(self.fmt, ++ self.p_type, ++ self.p_offset, ++ self.p_vaddr, ++ self.p_paddr, ++ self.p_filesz, ++ self.p_memsz, ++ self.p_flags, ++ self.p_align) ++ else: ++ data = struct.pack(self.fmt, ++ self.p_type, ++ self.p_flags, ++ self.p_offset, ++ self.p_vaddr, ++ self.p_paddr, ++ self.p_filesz, ++ self.p_memsz, ++ self.p_align) ++ f.write(data) ++ ++ ++def error(msg): ++ print(msg, file=sys.stderr) ++ sys.exit(1) ++ ++ ++def elf_edit_align(phdr, align): ++ if align == 'half': ++ phdr.p_align = phdr.p_align >> 1 ++ else: ++ phdr.p_align = int(align) ++ ++def elf_edit_maximize_tls_size(phdr, elfclass): ++ if elfclass == ELFCLASS32: ++ # It is possible that the kernel can allocate half of the ++ # address space, so use something larger. ++ phdr.p_memsz = 0xfff00000 ++ else: ++ phdr.p_memsz = 1 << 63 ++ ++def elf_edit(f, opts): ++ ei_nident_fmt = 'c' * EI_NIDENT ++ ei_nident_len = struct.calcsize(ei_nident_fmt) ++ ++ data = f.read(ei_nident_len) ++ if len(data) < ei_nident_len: ++ error('{}: e_nident too small'.format(f.name)) ++ e_ident = struct.unpack(ei_nident_fmt, data) ++ ++ if e_ident[EI_MAG0] != ELFMAG0 \ ++ or e_ident[EI_MAG1] != ELFMAG1 \ ++ or e_ident[EI_MAG2] != ELFMAG2 \ ++ or e_ident[EI_MAG3] != ELFMAG3: ++ error('{}: bad ELF header'.format(f.name)) ++ ++ if e_ident[EI_CLASS] != ELFCLASS32 \ ++ and e_ident[EI_CLASS] != ELFCLASS64: ++ error('{}: unsupported ELF class: {}'.format(f.name, e_ident[EI_CLASS])) ++ ++ if e_ident[EI_DATA] != ELFDATA2LSB \ ++ and e_ident[EI_DATA] != ELFDATA2MSB: \ ++ error('{}: unsupported ELF data: {}'.format(f.name, e_ident[EI_DATA])) ++ ++ ehdr = Elf_Ehdr(e_ident) ++ ehdr.read(f) ++ if ehdr.e_type not in (ET_EXEC, ET_DYN): ++ error('{}: not an executable or shared library'.format(f.name)) ++ ++ phdr = Elf_Phdr(e_ident) ++ maximize_tls_size_done = False ++ for i in range(0, ehdr.e_phnum): ++ f.seek(ehdr.e_phoff + i * phdr.len) ++ phdr.read(f) ++ if phdr.p_type == PT_LOAD and opts.align is not None: ++ elf_edit_align(phdr, opts.align) ++ f.seek(ehdr.e_phoff + i * phdr.len) ++ phdr.write(f) ++ break ++ if phdr.p_type == PT_TLS and opts.maximize_tls_size: ++ elf_edit_maximize_tls_size(phdr, e_ident[EI_CLASS]) ++ f.seek(ehdr.e_phoff + i * phdr.len) ++ phdr.write(f) ++ maximize_tls_size_done = True ++ break ++ ++ if opts.maximize_tls_size and not maximize_tls_size_done: ++ error('{}: TLS maximum size was not updated'.format(f.name)) ++ ++def get_parser(): ++ parser = argparse.ArgumentParser(description=__doc__) ++ parser.add_argument('-a', dest='align', ++ help='How to set the LOAD alignment') ++ parser.add_argument('--maximize-tls-size', action='store_true', ++ help='Set maximum PT_TLS size') ++ parser.add_argument('output', ++ help='ELF file to edit') ++ return parser ++ ++ ++def main(argv): ++ parser = get_parser() ++ opts = parser.parse_args(argv) ++ with open(opts.output, 'r+b') as fout: ++ elf_edit(fout, opts) ++ ++ ++if __name__ == '__main__': ++ main(sys.argv[1:]) +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index a38de94bf7ea8e93..87ad2f3f4d89eb7d 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1238,6 +1238,11 @@ extern struct link_map * _dl_get_dl_main_map (void) + /* Initialize the DSO sort algorithm to use. */ + extern void _dl_sort_maps_init (void) attribute_hidden; + ++/* Perform early memory allocation, avoding a TCB dependency. ++ Terminate the process if allocation fails. May attempt to use ++ brk. */ ++void *_dl_early_allocate (size_t size) attribute_hidden; ++ + /* Initialization of libpthread for statically linked applications. + If libpthread is not linked in, this is an empty function. */ + void __pthread_initialize_minimal (void) weak_function; +diff --git a/sysdeps/unix/sysv/linux/dl-early_allocate.c b/sysdeps/unix/sysv/linux/dl-early_allocate.c +new file mode 100644 +index 0000000000000000..52c538e85afa8522 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-early_allocate.c +@@ -0,0 +1,82 @@ ++/* Early memory allocation for the dynamic loader. Generic version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Mark symbols hidden in static PIE for early self relocation to work. */ ++#if BUILD_PIE_DEFAULT ++# pragma GCC visibility push(hidden) ++#endif ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++/* Defined in brk.c. */ ++extern void *__curbrk; ++ ++void * ++_dl_early_allocate (size_t size) ++{ ++ void *result; ++ ++ if (__curbrk != NULL) ++ /* If the break has been initialized, brk must have run before, ++ so just call it once more. */ ++ { ++ result = __sbrk (size); ++ if (result == (void *) -1) ++ result = NULL; ++ } ++ else ++ { ++ /* If brk has not been invoked, there is no need to update ++ __curbrk. The first call to brk will take care of that. */ ++ void *previous = __brk_call (0); ++ result = __brk_call (previous + size); ++ if (result == previous) ++ result = NULL; ++ else ++ result = previous; ++ } ++ ++ /* If brk fails, fall back to mmap. This can happen due to ++ unfortunate ASLR layout decisions and kernel bugs, particularly ++ for static PIE. */ ++ if (result == NULL) ++ { ++ long int ret; ++ int prot = PROT_READ | PROT_WRITE; ++ int flags = MAP_PRIVATE | MAP_ANONYMOUS; ++#ifdef __NR_mmap2 ++ ret = MMAP_CALL_INTERNAL (mmap2, 0, size, prot, flags, -1, 0); ++#else ++ ret = MMAP_CALL_INTERNAL (mmap, 0, size, prot, flags, -1, 0); ++#endif ++ if (INTERNAL_SYSCALL_ERROR_P (ret)) ++ result = NULL; ++ else ++ result = (void *) ret; ++ } ++ ++ return result; ++} diff --git a/SOURCES/glibc-upstream-2.34-253.patch b/SOURCES/glibc-upstream-2.34-253.patch new file mode 100644 index 0000000..2be9efc --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-253.patch @@ -0,0 +1,350 @@ +commit 89b638f48ac5c9af5b1fe9caa6287d70127b66a5 +Author: Stefan Liebler +Date: Tue May 17 16:12:18 2022 +0200 + + S390: Enable static PIE + + This commit enables static PIE on 64bit. On 31bit, static PIE is + not supported. + + A new configure check in sysdeps/s390/s390-64/configure.ac also performs + a minimal test for requirements in ld: + Ensure you also have those patches for: + - binutils (ld) + - "[PR ld/22263] s390: Avoid dynamic TLS relocs in PIE" + https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=26b1426577b5dcb32d149c64cca3e603b81948a9 + (Tested by configure check above) + Otherwise there will be a R_390_TLS_TPOFF relocation, which fails to + be processed in _dl_relocate_static_pie() as static TLS map is not setup. + - "s390: Add DT_JMPREL pointing to .rela.[i]plt with static-pie" + https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d942d8db12adf4c9e5c7d9ed6496a779ece7149e + (We can't test it in configure as we are not able to link a static PIE + executable if the system glibc lacks static PIE support) + Otherwise there won't be DT_JMPREL, DT_PLTRELA, DT_PLTRELASZ entries + and the IFUNC symbols are not processed, which leads to crashes. + + - kernel (the mentioned links to the commits belong to 5.19 merge window): + - "s390/mmap: increase stack/mmap gap to 128MB" + https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=f2f47d0ef72c30622e62471903ea19446ea79ee2 + - "s390/vdso: move vdso mapping to its own function" + https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=57761da4dc5cd60bed2c81ba0edb7495c3c740b8 + - "s390/vdso: map vdso above stack" + https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=9e37a2e8546f9e48ea76c839116fa5174d14e033 + - "s390/vdso: add vdso randomization" + https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=41cd81abafdc4e58a93fcb677712a76885e3ca25 + (We can't test the kernel of the target system) + Otherwise if /proc/sys/kernel/randomize_va_space is turned off (0), + static PIE executables like ldconfig will crash. While startup sbrk is + used to enlarge the HEAP. Unfortunately the underlying brk syscall fails + as there is not enough space after the HEAP. Then the address of the TLS + image is invalid and the following memcpy in __libc_setup_tls() leads + to a segfault. + If /proc/sys/kernel/randomize_va_space is activated (default: 2), there + is enough space after HEAP. + + - glibc + - "Linux: Define MMAP_CALL_INTERNAL" + https://sourceware.org/git/?p=glibc.git;a=commit;h=c1b68685d438373efe64e5f076f4215723004dfb + - "i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S" + https://sourceware.org/git/?p=glibc.git;a=commit;h=6e5c7a1e262961adb52443ab91bd2c9b72316402 + - "i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls" + https://sourceware.org/git/?p=glibc.git;a=commit;h=60f0f2130d30cfd008ca39743027f1e200592dff + - "ia64: Always define IA64_USE_NEW_STUB as a flag macro" + https://sourceware.org/git/?p=glibc.git;a=commit;h=18bd9c3d3b1b6a9182698c85354578d1d58e9d64 + - "Linux: Implement a useful version of _startup_fatal" + https://sourceware.org/git/?p=glibc.git;a=commit;h=a2a6bce7d7e52c1c34369a7da62c501cc350bc31 + - "Linux: Introduce __brk_call for invoking the brk system call" + https://sourceware.org/git/?p=glibc.git;a=commit;h=b57ab258c1140bc45464b4b9908713e3e0ee35aa + - "csu: Implement and use _dl_early_allocate during static startup" + https://sourceware.org/git/?p=glibc.git;a=commit;h=f787e138aa0bf677bf74fa2a08595c446292f3d7 + The mentioned patch series by Florian Weimer avoids the mentioned failing + sbrk syscall by falling back to mmap. + + This commit also adjusts startup code in start.S to be ready for static PIE. + We have to add a wrapper function for main as we are not allowed to use + GOT relocations before __libc_start_main is called. + (Compare also to: + - commit 14d886edbd3d80b771e1c42fbd9217f9074de9c6 + "aarch64: fix start code for static pie" + - commit 3d1d79283e6de4f7c434cb67fb53a4fd28359669 + "aarch64: fix static pie enabled libc when main is in a shared library" + ) + + (cherry picked from commit 728894dba4a19578bd803906de184a8dd51ed13c) + +diff --git a/sysdeps/s390/s390-64/configure b/sysdeps/s390/s390-64/configure +new file mode 100644 +index 0000000000000000..101c570d2e62da25 +--- /dev/null ++++ b/sysdeps/s390/s390-64/configure +@@ -0,0 +1,122 @@ ++# This file is generated from configure.ac by Autoconf. DO NOT EDIT! ++ # Local configure fragment for sysdeps/s390/s390-64. ++ ++# Minimal checking for static PIE support in ld. ++# Compare to ld testcase/bugzilla: ++# /ld/testsuite/ld-elf/pr22263-1.rd ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for s390-specific static PIE requirements" >&5 ++$as_echo_n "checking for s390-specific static PIE requirements... " >&6; } ++if { as_var=\ ++libc_cv_s390x_staticpie_req; eval \${$as_var+:} false; }; then : ++ $as_echo_n "(cached) " >&6 ++else ++ cat > conftest1.c < conftest2.c <&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } \ ++ && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -fPIE -c conftest2.c -o conftest2.o' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } \ ++ && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -pie -o conftest conftest1.o conftest2.o' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } \ ++ && { ac_try='! readelf -Wr conftest | grep R_390_TLS_TPOFF' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ libc_cv_s390x_staticpie_req=yes ++ fi ++ rm -rf conftest.* ++fi ++eval ac_res=\$\ ++libc_cv_s390x_staticpie_req ++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 ++$as_echo "$ac_res" >&6; } ++if test $libc_cv_s390x_staticpie_req = yes; then ++ # Static PIE is supported only on 64bit. ++ # Ensure you also have those patches for: ++ # - binutils (ld) ++ # - "[PR ld/22263] s390: Avoid dynamic TLS relocs in PIE" ++ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=26b1426577b5dcb32d149c64cca3e603b81948a9 ++ # (Tested by configure check above) ++ # Otherwise there will be a R_390_TLS_TPOFF relocation, which fails to ++ # be processed in _dl_relocate_static_pie() as static TLS map is not setup. ++ # - "s390: Add DT_JMPREL pointing to .rela.[i]plt with static-pie" ++ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d942d8db12adf4c9e5c7d9ed6496a779ece7149e ++ # (We can't test it in configure as we are not able to link a static PIE ++ # executable if the system glibc lacks static PIE support) ++ # Otherwise there won't be DT_JMPREL, DT_PLTRELA, DT_PLTRELASZ entries ++ # and the IFUNC symbols are not processed, which leads to crashes. ++ # ++ # - kernel (the mentioned links to the commits belong to 5.19 merge window): ++ # - "s390/mmap: increase stack/mmap gap to 128MB" ++ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=f2f47d0ef72c30622e62471903ea19446ea79ee2 ++ # - "s390/vdso: move vdso mapping to its own function" ++ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=57761da4dc5cd60bed2c81ba0edb7495c3c740b8 ++ # - "s390/vdso: map vdso above stack" ++ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=9e37a2e8546f9e48ea76c839116fa5174d14e033 ++ # - "s390/vdso: add vdso randomization" ++ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=41cd81abafdc4e58a93fcb677712a76885e3ca25 ++ # (We can't test the kernel of the target system) ++ # Otherwise if /proc/sys/kernel/randomize_va_space is turned off (0), ++ # static PIE executables like ldconfig will crash. While startup sbrk is ++ # used to enlarge the HEAP. Unfortunately the underlying brk syscall fails ++ # as there is not enough space after the HEAP. Then the address of the TLS ++ # image is invalid and the following memcpy in __libc_setup_tls() leads ++ # to a segfault. ++ # If /proc/sys/kernel/randomize_va_space is activated (default: 2), there ++ # is enough space after HEAP. ++ # ++ # - glibc ++ # - "Linux: Define MMAP_CALL_INTERNAL" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=c1b68685d438373efe64e5f076f4215723004dfb ++ # - "i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=6e5c7a1e262961adb52443ab91bd2c9b72316402 ++ # - "i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=60f0f2130d30cfd008ca39743027f1e200592dff ++ # - "ia64: Always define IA64_USE_NEW_STUB as a flag macro" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=18bd9c3d3b1b6a9182698c85354578d1d58e9d64 ++ # - "Linux: Implement a useful version of _startup_fatal" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=a2a6bce7d7e52c1c34369a7da62c501cc350bc31 ++ # - "Linux: Introduce __brk_call for invoking the brk system call" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=b57ab258c1140bc45464b4b9908713e3e0ee35aa ++ # - "csu: Implement and use _dl_early_allocate during static startup" ++ # https://sourceware.org/git/?p=glibc.git;a=commit;h=f787e138aa0bf677bf74fa2a08595c446292f3d7 ++ # The mentioned patch series by Florian Weimer avoids the mentioned failing ++ # sbrk syscall by falling back to mmap. ++ $as_echo "#define SUPPORT_STATIC_PIE 1" >>confdefs.h ++ ++fi +diff --git a/sysdeps/s390/s390-64/configure.ac b/sysdeps/s390/s390-64/configure.ac +new file mode 100644 +index 0000000000000000..2583a4a3350ac11f +--- /dev/null ++++ b/sysdeps/s390/s390-64/configure.ac +@@ -0,0 +1,92 @@ ++GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. ++# Local configure fragment for sysdeps/s390/s390-64. ++ ++# Minimal checking for static PIE support in ld. ++# Compare to ld testcase/bugzilla: ++# /ld/testsuite/ld-elf/pr22263-1.rd ++AC_CACHE_CHECK([for s390-specific static PIE requirements], \ ++[libc_cv_s390x_staticpie_req], [dnl ++ cat > conftest1.c < conftest2.c < Scrt1.o */ + larl %r2,main@GOTENT # load pointer to main + lg %r2,0(%r2) ++# else ++ /* Used for dynamic linked position dependent executable. ++ => crt1.o (glibc configured without --disable-default-pie: ++ PIC is defined) ++ Or for static linked position independent executable. ++ => rcrt1.o (only available if glibc configured without ++ --disable-default-pie: PIC is defined) */ ++ larl %r2,__wrap_main ++# endif + brasl %r14,__libc_start_main@plt + #else ++ /* Used for dynamic/static linked position dependent executable. ++ => crt1.o (glibc configured with --disable-default-pie: ++ PIC and SHARED are not defined) */ + larl %r2,main # load pointer to main + brasl %r14,__libc_start_main + #endif +@@ -98,6 +113,19 @@ _start: + + cfi_endproc + ++#if defined PIC && !defined SHARED ++ /* When main is not defined in the executable but in a shared library ++ then a wrapper is needed in crt1.o of the static-pie enabled libc, ++ because crt1.o and rcrt1.o share code and the later must avoid the ++ use of GOT relocations before __libc_start_main is called. */ ++__wrap_main: ++ cfi_startproc ++ larl %r1,main@GOTENT # load pointer to main ++ lg %r1,0(%r1) ++ br %r1 ++ cfi_endproc ++#endif ++ + /* Define a symbol for the first piece of initialized data. */ + .data + .globl __data_start diff --git a/SOURCES/glibc-upstream-2.34-254.patch b/SOURCES/glibc-upstream-2.34-254.patch new file mode 100644 index 0000000..be69b2f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-254.patch @@ -0,0 +1,301 @@ +commit c73c79af7d6f1124fbfa5d935b4f620217d6a2ec +Author: Szabolcs Nagy +Date: Fri Jun 15 16:14:58 2018 +0100 + + rtld: Use generic argv adjustment in ld.so [BZ #23293] + + When an executable is invoked as + + ./ld.so [ld.so-args] ./exe [exe-args] + + then the argv is adujusted in ld.so before calling the entry point of + the executable so ld.so args are not visible to it. On most targets + this requires moving argv, env and auxv on the stack to ensure correct + stack alignment at the entry point. This had several issues: + + - The code for this adjustment on the stack is written in asm as part + of the target specific ld.so _start code which is hard to maintain. + + - The adjustment is done after _dl_start returns, where it's too late + to update GLRO(dl_auxv), as it is already readonly, so it points to + memory that was clobbered by the adjustment. This is bug 23293. + + - _environ is also wrong in ld.so after the adjustment, but it is + likely not used after _dl_start returns so this is not user visible. + + - _dl_argv was updated, but for this it was moved out of relro, which + changes security properties across targets unnecessarily. + + This patch introduces a generic _dl_start_args_adjust function that + handles the argument adjustments after ld.so processed its own args + and before relro protection is applied. + + The same algorithm is used on all targets, _dl_skip_args is now 0, so + existing target specific adjustment code is no longer used. The bug + affects aarch64, alpha, arc, arm, csky, ia64, nios2, s390-32 and sparc, + other targets don't need the change in principle, only for consistency. + + The GNU Hurd start code relied on _dl_skip_args after dl_main returned, + now it checks directly if args were adjusted and fixes the Hurd startup + data accordingly. + + Follow up patches can remove _dl_skip_args and DL_ARGV_NOT_RELRO. + + Tested on aarch64-linux-gnu and cross tested on i686-gnu. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit ad43cac44a6860eaefcadadfb2acb349921e96bf) + +Conflicts: + elf/rtld.c + (Downstream-only backport of glibc-rh2023422-1.patch) + +diff --git a/elf/rtld.c b/elf/rtld.c +index 434fbeddd5cce74d..9de53ccaed420a57 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1121,6 +1121,62 @@ rtld_chain_load (struct link_map *main_map, char *argv0) + rtld_soname, pathname, errcode); + } + ++/* Adjusts the contents of the stack and related globals for the user ++ entry point. The ld.so processed skip_args arguments and bumped ++ _dl_argv and _dl_argc accordingly. Those arguments are removed from ++ argv here. */ ++static void ++_dl_start_args_adjust (int skip_args) ++{ ++ void **sp = (void **) (_dl_argv - skip_args - 1); ++ void **p = sp + skip_args; ++ ++ if (skip_args == 0) ++ return; ++ ++ /* Sanity check. */ ++ intptr_t argc = (intptr_t) sp[0] - skip_args; ++ assert (argc == _dl_argc); ++ ++ /* Adjust argc on stack. */ ++ sp[0] = (void *) (intptr_t) _dl_argc; ++ ++ /* Update globals in rtld. */ ++ _dl_argv -= skip_args; ++ _environ -= skip_args; ++ ++ /* Shuffle argv down. */ ++ do ++ *++sp = *++p; ++ while (*p != NULL); ++ ++ assert (_environ == (char **) (sp + 1)); ++ ++ /* Shuffle envp down. */ ++ do ++ *++sp = *++p; ++ while (*p != NULL); ++ ++#ifdef HAVE_AUX_VECTOR ++ void **auxv = (void **) GLRO(dl_auxv) - skip_args; ++ GLRO(dl_auxv) = (ElfW(auxv_t) *) auxv; /* Aliasing violation. */ ++ assert (auxv == sp + 1); ++ ++ /* Shuffle auxv down. */ ++ ElfW(auxv_t) ax; ++ char *oldp = (char *) (p + 1); ++ char *newp = (char *) (sp + 1); ++ do ++ { ++ memcpy (&ax, oldp, sizeof (ax)); ++ memcpy (newp, &ax, sizeof (ax)); ++ oldp += sizeof (ax); ++ newp += sizeof (ax); ++ } ++ while (ax.a_type != AT_NULL); ++#endif ++} ++ + static void + dl_main (const ElfW(Phdr) *phdr, + ElfW(Word) phnum, +@@ -1177,6 +1233,7 @@ dl_main (const ElfW(Phdr) *phdr, + rtld_is_main = true; + + char *argv0 = NULL; ++ char **orig_argv = _dl_argv; + + /* Note the place where the dynamic linker actually came from. */ + GL(dl_rtld_map).l_name = rtld_progname; +@@ -1191,7 +1248,6 @@ dl_main (const ElfW(Phdr) *phdr, + GLRO(dl_lazy) = -1; + } + +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + } +@@ -1200,14 +1256,12 @@ dl_main (const ElfW(Phdr) *phdr, + if (state.mode != rtld_mode_help) + state.mode = rtld_mode_verify; + +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + } + else if (! strcmp (_dl_argv[1], "--inhibit-cache")) + { + GLRO(dl_inhibit_cache) = 1; +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + } +@@ -1217,7 +1271,6 @@ dl_main (const ElfW(Phdr) *phdr, + state.library_path = _dl_argv[2]; + state.library_path_source = "--library-path"; + +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1226,7 +1279,6 @@ dl_main (const ElfW(Phdr) *phdr, + { + GLRO(dl_inhibit_rpath) = _dl_argv[2]; + +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1234,14 +1286,12 @@ dl_main (const ElfW(Phdr) *phdr, + { + audit_list_add_string (&state.audit_list, _dl_argv[2]); + +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } + else if (! strcmp (_dl_argv[1], "--preload") && _dl_argc > 2) + { + state.preloadarg = _dl_argv[2]; +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1249,7 +1299,6 @@ dl_main (const ElfW(Phdr) *phdr, + { + argv0 = _dl_argv[2]; + +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1257,7 +1306,6 @@ dl_main (const ElfW(Phdr) *phdr, + && _dl_argc > 2) + { + state.glibc_hwcaps_prepend = _dl_argv[2]; +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1265,7 +1313,6 @@ dl_main (const ElfW(Phdr) *phdr, + && _dl_argc > 2) + { + state.glibc_hwcaps_mask = _dl_argv[2]; +- _dl_skip_args += 2; + _dl_argc -= 2; + _dl_argv += 2; + } +@@ -1274,7 +1321,6 @@ dl_main (const ElfW(Phdr) *phdr, + { + state.mode = rtld_mode_list_tunables; + +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + } +@@ -1283,7 +1329,6 @@ dl_main (const ElfW(Phdr) *phdr, + { + state.mode = rtld_mode_list_diagnostics; + +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + } +@@ -1329,7 +1374,6 @@ dl_main (const ElfW(Phdr) *phdr, + _dl_usage (ld_so_name, NULL); + } + +- ++_dl_skip_args; + --_dl_argc; + ++_dl_argv; + +@@ -1428,6 +1472,9 @@ dl_main (const ElfW(Phdr) *phdr, + /* Set the argv[0] string now that we've processed the executable. */ + if (argv0 != NULL) + _dl_argv[0] = argv0; ++ ++ /* Adjust arguments for the application entry point. */ ++ _dl_start_args_adjust (_dl_argv - orig_argv); + } + else + { +diff --git a/sysdeps/mach/hurd/dl-sysdep.c b/sysdeps/mach/hurd/dl-sysdep.c +index 4b2072e5d5e3bfd2..5c0f8e46bfbd4753 100644 +--- a/sysdeps/mach/hurd/dl-sysdep.c ++++ b/sysdeps/mach/hurd/dl-sysdep.c +@@ -106,6 +106,7 @@ _dl_sysdep_start (void **start_argptr, + { + void go (intptr_t *argdata) + { ++ char *orig_argv0; + char **p; + + /* Cache the information in various global variables. */ +@@ -114,6 +115,8 @@ _dl_sysdep_start (void **start_argptr, + _environ = &_dl_argv[_dl_argc + 1]; + for (p = _environ; *p++;); /* Skip environ pointers and terminator. */ + ++ orig_argv0 = _dl_argv[0]; ++ + if ((void *) p == _dl_argv[0]) + { + static struct hurd_startup_data nodata; +@@ -204,30 +207,23 @@ unfmh(); /* XXX */ + + /* The call above might screw a few things up. + +- First of all, if _dl_skip_args is nonzero, we are ignoring +- the first few arguments. However, if we have no Hurd startup +- data, it is the magical convention that ARGV[0] == P. The ++ P is the location after the terminating NULL of the list of ++ environment variables. It has to point to the Hurd startup ++ data or if that's missing then P == ARGV[0] must hold. The + startup code in init-first.c will get confused if this is not + the case, so we must rearrange things to make it so. We'll +- overwrite the origional ARGV[0] at P with ARGV[_dl_skip_args]. ++ recompute P and move the Hurd data or the new ARGV[0] there. + +- Secondly, if we need to be secure, it removes some dangerous +- environment variables. If we have no Hurd startup date this +- changes P (since that's the location after the terminating +- NULL in the list of environment variables). We do the same +- thing as in the first case but make sure we recalculate P. +- If we do have Hurd startup data, we have to move the data +- such that it starts just after the terminating NULL in the +- environment list. ++ Note: directly invoked ld.so can move arguments and env vars. + + We use memmove, since the locations might overlap. */ +- if (__libc_enable_secure || _dl_skip_args) +- { +- char **newp; + +- for (newp = _environ; *newp++;); ++ char **newp; ++ for (newp = _environ; *newp++;); + +- if (_dl_argv[-_dl_skip_args] == (char *) p) ++ if (newp != p || _dl_argv[0] != orig_argv0) ++ { ++ if (orig_argv0 == (char *) p) + { + if ((char *) newp != _dl_argv[0]) + { diff --git a/SOURCES/glibc-upstream-2.34-255.patch b/SOURCES/glibc-upstream-2.34-255.patch new file mode 100644 index 0000000..aa679f3 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-255.patch @@ -0,0 +1,105 @@ +commit b2585cae2854d7d2868fb2e51e2796042c5e0679 +Author: Szabolcs Nagy +Date: Tue May 3 13:18:04 2022 +0100 + + linux: Add a getauxval test [BZ #23293] + + This is for bug 23293 and it relies on the glibc test system running + tests via explicit ld.so invokation by default. + + Reviewed-by: Florian Weimer + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 9faf5262c77487c96da8a3e961b88c0b1879e186) + +diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile +index 0657f4003e7116c6..5c772f69d1b1f1f1 100644 +--- a/sysdeps/unix/sysv/linux/Makefile ++++ b/sysdeps/unix/sysv/linux/Makefile +@@ -123,6 +123,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \ + tst-close_range \ + tst-prctl \ + tst-scm_rights \ ++ tst-getauxval \ + # tests + + # Test for the symbol version of fcntl that was replaced in glibc 2.28. +diff --git a/sysdeps/unix/sysv/linux/tst-getauxval.c b/sysdeps/unix/sysv/linux/tst-getauxval.c +new file mode 100644 +index 0000000000000000..c4b619574369f4c5 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-getauxval.c +@@ -0,0 +1,74 @@ ++/* Basic test for getauxval. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++static int missing; ++static int mismatch; ++ ++static void ++check_nonzero (unsigned long t, const char *s) ++{ ++ unsigned long v = getauxval (t); ++ printf ("%s: %lu (0x%lx)\n", s, v, v); ++ if (v == 0) ++ missing++; ++} ++ ++static void ++check_eq (unsigned long t, const char *s, unsigned long want) ++{ ++ unsigned long v = getauxval (t); ++ printf ("%s: %lu want: %lu\n", s, v, want); ++ if (v != want) ++ mismatch++; ++} ++ ++#define NZ(x) check_nonzero (x, #x) ++#define EQ(x, want) check_eq (x, #x, want) ++ ++static int ++do_test (void) ++{ ++ /* These auxv entries should be non-zero on Linux. */ ++ NZ (AT_PHDR); ++ NZ (AT_PHENT); ++ NZ (AT_PHNUM); ++ NZ (AT_PAGESZ); ++ NZ (AT_ENTRY); ++ NZ (AT_CLKTCK); ++ NZ (AT_RANDOM); ++ NZ (AT_EXECFN); ++ if (missing) ++ FAIL_EXIT1 ("Found %d missing auxv entries.\n", missing); ++ ++ /* Check against syscalls. */ ++ EQ (AT_UID, getuid ()); ++ EQ (AT_EUID, geteuid ()); ++ EQ (AT_GID, getgid ()); ++ EQ (AT_EGID, getegid ()); ++ if (mismatch) ++ FAIL_EXIT1 ("Found %d mismatching auxv entries.\n", mismatch); ++ ++ return 0; ++} ++ ++#include diff --git a/SOURCES/glibc-upstream-2.34-256.patch b/SOURCES/glibc-upstream-2.34-256.patch new file mode 100644 index 0000000..d92a5d0 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-256.patch @@ -0,0 +1,39 @@ +commit 14770f3e0462721b317f138197e1fbf4db542c94 +Author: Sergei Trofimovich +Date: Mon May 23 13:56:43 2022 +0530 + + string.h: fix __fortified_attr_access macro call [BZ #29162] + + commit e938c0274 "Don't add access size hints to fortifiable functions" + converted a few '__attr_access ((...))' into '__fortified_attr_access (...)' + calls. + + But one of conversions had double parentheses of '__fortified_attr_access (...)'. + + Noticed as a gnat6 build failure: + + /<>-glibc-2.34-210-dev/include/bits/string_fortified.h:110:50: error: macro "__fortified_attr_access" requires 3 arguments, but only 1 given + + The change fixes parentheses. + + This is seen when using compilers that do not support + __builtin___stpncpy_chk, e.g. gcc older than 4.7, clang older than 2.6 + or some compiler not derived from gcc or clang. + + Signed-off-by: Sergei Trofimovich + Reviewed-by: Siddhesh Poyarekar + (cherry picked from commit 5a5f94af0542f9a35aaa7992c18eb4e2403a29b9) + +diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h +index 218006c9ba882d9c..4e66e0bd1ebb572a 100644 +--- a/string/bits/string_fortified.h ++++ b/string/bits/string_fortified.h +@@ -107,7 +107,7 @@ __NTH (stpncpy (char *__dest, const char *__src, size_t __n)) + # else + extern char *__stpncpy_chk (char *__dest, const char *__src, size_t __n, + size_t __destlen) __THROW +- __fortified_attr_access ((__write_only__, 1, 3)) ++ __fortified_attr_access (__write_only__, 1, 3) + __attr_access ((__read_only__, 2)); + extern char *__REDIRECT_NTH (__stpncpy_alias, (char *__dest, const char *__src, + size_t __n), stpncpy); diff --git a/SOURCES/glibc-upstream-2.34-257.patch b/SOURCES/glibc-upstream-2.34-257.patch new file mode 100644 index 0000000..c9e1cd5 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-257.patch @@ -0,0 +1,51 @@ +commit 83ae8287c1c3009459ff29241b647ff61363b22c +Author: Noah Goldstein +Date: Tue Feb 15 08:18:15 2022 -0600 + + x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #29127] + + Re-cherry-pick commit c627209832 for strcmp-avx2.S change which was + omitted in intial cherry pick because at the time this bug was not + present on release branch. + + Fixes BZ #29127. + + In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would + call strcmp-avx2 and wcscmp-avx2 respectively. This would have + not checks around vzeroupper and would trigger spurious + aborts. This commit fixes that. + + test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on + AVX2 machines with and without RTM. + + Co-authored-by: H.J. Lu + (cherry picked from commit c6272098323153db373f2986c67786ea8c85f1cf) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index aa91f6e48a0e1ce5..a9806daadbbfd18b 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -345,10 +345,10 @@ L(one_or_less): + movq %LOCALE_REG, %rdx + # endif + jb L(ret_zero) +-# ifdef USE_AS_WCSCMP + /* 'nbe' covers the case where length is negative (large + unsigned). */ +- jnbe __wcscmp_avx2 ++ jnbe OVERFLOW_STRCMP ++# ifdef USE_AS_WCSCMP + movl (%rdi), %edx + xorl %eax, %eax + cmpl (%rsi), %edx +@@ -357,10 +357,6 @@ L(one_or_less): + negl %eax + orl $1, %eax + # else +- /* 'nbe' covers the case where length is negative (large +- unsigned). */ +- +- jnbe __strcmp_avx2 + movzbl (%rdi), %eax + movzbl (%rsi), %ecx + TOLOWER_gpr (%rax, %eax) diff --git a/SOURCES/glibc-upstream-2.34-258.patch b/SOURCES/glibc-upstream-2.34-258.patch new file mode 100644 index 0000000..1f04c21 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-258.patch @@ -0,0 +1,737 @@ +commit ff450cdbdee0b8cb6b9d653d6d2fa892de29be31 +Author: Arjun Shankar +Date: Tue May 24 17:57:36 2022 +0200 + + Fix deadlock when pthread_atfork handler calls pthread_atfork or dlclose + + In multi-threaded programs, registering via pthread_atfork, + de-registering implicitly via dlclose, or running pthread_atfork + handlers during fork was protected by an internal lock. This meant + that a pthread_atfork handler attempting to register another handler or + dlclose a dynamically loaded library would lead to a deadlock. + + This commit fixes the deadlock in the following way: + + During the execution of handlers at fork time, the atfork lock is + released prior to the execution of each handler and taken again upon its + return. Any handler registrations or de-registrations that occurred + during the execution of the handler are accounted for before proceeding + with further handler execution. + + If a handler that hasn't been executed yet gets de-registered by another + handler during fork, it will not be executed. If a handler gets + registered by another handler during fork, it will not be executed + during that particular fork. + + The possibility that handlers may now be registered or deregistered + during handler execution means that identifying the next handler to be + run after a given handler may register/de-register others requires some + bookkeeping. The fork_handler struct has an additional field, 'id', + which is assigned sequentially during registration. Thus, handlers are + executed in ascending order of 'id' during 'prepare', and descending + order of 'id' during parent/child handler execution after the fork. + + Two tests are included: + + * tst-atfork3: Adhemerval Zanella + This test exercises calling dlclose from prepare, parent, and child + handlers. + + * tst-atfork4: This test exercises calling pthread_atfork and dlclose + from the prepare handler. + + [BZ #24595, BZ #27054] + + Co-authored-by: Adhemerval Zanella + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 52a103e237329b9f88a28513fe7506ffc3bd8ced) + +diff --git a/include/register-atfork.h b/include/register-atfork.h +index fadde14700947ac6..6d7bfd87688d6530 100644 +--- a/include/register-atfork.h ++++ b/include/register-atfork.h +@@ -26,6 +26,7 @@ struct fork_handler + void (*parent_handler) (void); + void (*child_handler) (void); + void *dso_handle; ++ uint64_t id; + }; + + /* Function to call to unregister fork handlers. */ +@@ -39,19 +40,18 @@ enum __run_fork_handler_type + atfork_run_parent + }; + +-/* Run the atfork handlers and lock/unlock the internal lock depending +- of the WHO argument: +- +- - atfork_run_prepare: run all the PREPARE_HANDLER in reverse order of +- insertion and locks the internal lock. +- - atfork_run_child: run all the CHILD_HANDLER and unlocks the internal +- lock. +- - atfork_run_parent: run all the PARENT_HANDLER and unlocks the internal +- lock. +- +- Perform locking only if DO_LOCKING. */ +-extern void __run_fork_handlers (enum __run_fork_handler_type who, +- _Bool do_locking) attribute_hidden; ++/* Run the atfork prepare handlers in the reverse order of registration and ++ return the ID of the last registered handler. If DO_LOCKING is true, the ++ internal lock is held locked upon return. */ ++extern uint64_t __run_prefork_handlers (_Bool do_locking) attribute_hidden; ++ ++/* Given a handler type (parent or child), run all the atfork handlers in ++ the order of registration up to and including the handler with id equal ++ to LASTRUN. If DO_LOCKING is true, the internal lock is unlocked prior ++ to return. */ ++extern void __run_postfork_handlers (enum __run_fork_handler_type who, ++ _Bool do_locking, ++ uint64_t lastrun) attribute_hidden; + + /* C library side function to register new fork handlers. */ + extern int __register_atfork (void (*__prepare) (void), +diff --git a/posix/fork.c b/posix/fork.c +index 021691b9b7441f15..890b806eb48cb75a 100644 +--- a/posix/fork.c ++++ b/posix/fork.c +@@ -46,8 +46,9 @@ __libc_fork (void) + best effort to make is async-signal-safe at least for single-thread + case. */ + bool multiple_threads = __libc_single_threaded == 0; ++ uint64_t lastrun; + +- __run_fork_handlers (atfork_run_prepare, multiple_threads); ++ lastrun = __run_prefork_handlers (multiple_threads); + + struct nss_database_data nss_database_data; + +@@ -105,7 +106,7 @@ __libc_fork (void) + reclaim_stacks (); + + /* Run the handlers registered for the child. */ +- __run_fork_handlers (atfork_run_child, multiple_threads); ++ __run_postfork_handlers (atfork_run_child, multiple_threads, lastrun); + } + else + { +@@ -123,7 +124,7 @@ __libc_fork (void) + } + + /* Run the handlers registered for the parent. */ +- __run_fork_handlers (atfork_run_parent, multiple_threads); ++ __run_postfork_handlers (atfork_run_parent, multiple_threads, lastrun); + + if (pid < 0) + __set_errno (save_errno); +diff --git a/posix/register-atfork.c b/posix/register-atfork.c +index 6fd9e4c56aafd7cc..6370437aa68e039e 100644 +--- a/posix/register-atfork.c ++++ b/posix/register-atfork.c +@@ -19,6 +19,8 @@ + #include + #include + #include ++#include ++#include + + #define DYNARRAY_ELEMENT struct fork_handler + #define DYNARRAY_STRUCT fork_handler_list +@@ -27,7 +29,7 @@ + #include + + static struct fork_handler_list fork_handlers; +-static bool fork_handler_init = false; ++static uint64_t fork_handler_counter; + + static int atfork_lock = LLL_LOCK_INITIALIZER; + +@@ -37,11 +39,8 @@ __register_atfork (void (*prepare) (void), void (*parent) (void), + { + lll_lock (atfork_lock, LLL_PRIVATE); + +- if (!fork_handler_init) +- { +- fork_handler_list_init (&fork_handlers); +- fork_handler_init = true; +- } ++ if (fork_handler_counter == 0) ++ fork_handler_list_init (&fork_handlers); + + struct fork_handler *newp = fork_handler_list_emplace (&fork_handlers); + if (newp != NULL) +@@ -50,6 +49,13 @@ __register_atfork (void (*prepare) (void), void (*parent) (void), + newp->parent_handler = parent; + newp->child_handler = child; + newp->dso_handle = dso_handle; ++ ++ /* IDs assigned to handlers start at 1 and increment with handler ++ registration. Un-registering a handlers discards the corresponding ++ ID. It is not reused in future registrations. */ ++ if (INT_ADD_OVERFLOW (fork_handler_counter, 1)) ++ __libc_fatal ("fork handler counter overflow"); ++ newp->id = ++fork_handler_counter; + } + + /* Release the lock. */ +@@ -104,37 +110,111 @@ __unregister_atfork (void *dso_handle) + lll_unlock (atfork_lock, LLL_PRIVATE); + } + +-void +-__run_fork_handlers (enum __run_fork_handler_type who, _Bool do_locking) ++uint64_t ++__run_prefork_handlers (_Bool do_locking) + { +- struct fork_handler *runp; ++ uint64_t lastrun; + +- if (who == atfork_run_prepare) ++ if (do_locking) ++ lll_lock (atfork_lock, LLL_PRIVATE); ++ ++ /* We run prepare handlers from last to first. After fork, only ++ handlers up to the last handler found here (pre-fork) will be run. ++ Handlers registered during __run_prefork_handlers or ++ __run_postfork_handlers will be positioned after this last handler, and ++ since their prepare handlers won't be run now, their parent/child ++ handlers should also be ignored. */ ++ lastrun = fork_handler_counter; ++ ++ size_t sl = fork_handler_list_size (&fork_handlers); ++ for (size_t i = sl; i > 0;) + { +- if (do_locking) +- lll_lock (atfork_lock, LLL_PRIVATE); +- size_t sl = fork_handler_list_size (&fork_handlers); +- for (size_t i = sl; i > 0; i--) +- { +- runp = fork_handler_list_at (&fork_handlers, i - 1); +- if (runp->prepare_handler != NULL) +- runp->prepare_handler (); +- } ++ struct fork_handler *runp ++ = fork_handler_list_at (&fork_handlers, i - 1); ++ ++ uint64_t id = runp->id; ++ ++ if (runp->prepare_handler != NULL) ++ { ++ if (do_locking) ++ lll_unlock (atfork_lock, LLL_PRIVATE); ++ ++ runp->prepare_handler (); ++ ++ if (do_locking) ++ lll_lock (atfork_lock, LLL_PRIVATE); ++ } ++ ++ /* We unlocked, ran the handler, and locked again. In the ++ meanwhile, one or more deregistrations could have occurred leading ++ to the current (just run) handler being moved up the list or even ++ removed from the list itself. Since handler IDs are guaranteed to ++ to be in increasing order, the next handler has to have: */ ++ ++ /* A. An earlier position than the current one has. */ ++ i--; ++ ++ /* B. A lower ID than the current one does. The code below skips ++ any newly added handlers with higher IDs. */ ++ while (i > 0 ++ && fork_handler_list_at (&fork_handlers, i - 1)->id >= id) ++ i--; + } +- else ++ ++ return lastrun; ++} ++ ++void ++__run_postfork_handlers (enum __run_fork_handler_type who, _Bool do_locking, ++ uint64_t lastrun) ++{ ++ size_t sl = fork_handler_list_size (&fork_handlers); ++ for (size_t i = 0; i < sl;) + { +- size_t sl = fork_handler_list_size (&fork_handlers); +- for (size_t i = 0; i < sl; i++) +- { +- runp = fork_handler_list_at (&fork_handlers, i); +- if (who == atfork_run_child && runp->child_handler) +- runp->child_handler (); +- else if (who == atfork_run_parent && runp->parent_handler) +- runp->parent_handler (); +- } ++ struct fork_handler *runp = fork_handler_list_at (&fork_handlers, i); ++ uint64_t id = runp->id; ++ ++ /* prepare handlers were not run for handlers with ID > LASTRUN. ++ Thus, parent/child handlers will also not be run. */ ++ if (id > lastrun) ++ break; ++ + if (do_locking) +- lll_unlock (atfork_lock, LLL_PRIVATE); ++ lll_unlock (atfork_lock, LLL_PRIVATE); ++ ++ if (who == atfork_run_child && runp->child_handler) ++ runp->child_handler (); ++ else if (who == atfork_run_parent && runp->parent_handler) ++ runp->parent_handler (); ++ ++ if (do_locking) ++ lll_lock (atfork_lock, LLL_PRIVATE); ++ ++ /* We unlocked, ran the handler, and locked again. In the meanwhile, ++ one or more [de]registrations could have occurred. Due to this, ++ the list size must be updated. */ ++ sl = fork_handler_list_size (&fork_handlers); ++ ++ /* The just-run handler could also have moved up the list. */ ++ ++ if (sl > i && fork_handler_list_at (&fork_handlers, i)->id == id) ++ /* The position of the recently run handler hasn't changed. The ++ next handler to be run is an easy increment away. */ ++ i++; ++ else ++ { ++ /* The next handler to be run is the first handler in the list ++ to have an ID higher than the current one. */ ++ for (i = 0; i < sl; i++) ++ { ++ if (fork_handler_list_at (&fork_handlers, i)->id > id) ++ break; ++ } ++ } + } ++ ++ if (do_locking) ++ lll_unlock (atfork_lock, LLL_PRIVATE); + } + + +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index 00419c4d199df912..5147588c130c9415 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -154,16 +154,36 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx6 tst-cancelx8 tst-cancelx9 \ + tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 + + ifeq ($(build-shared),yes) +-tests += tst-atfork2 tst-pt-tls4 tst-_res1 tst-fini1 tst-create1 ++tests += \ ++ tst-atfork2 \ ++ tst-pt-tls4 \ ++ tst-_res1 \ ++ tst-fini1 \ ++ tst-create1 \ ++ tst-atfork3 \ ++ tst-atfork4 \ ++# tests ++ + tests-nolibpthread += tst-fini1 + endif + +-modules-names += tst-atfork2mod tst-tls4moda tst-tls4modb \ +- tst-_res1mod1 tst-_res1mod2 tst-fini1mod \ +- tst-create1mod ++modules-names += \ ++ tst-atfork2mod \ ++ tst-tls4moda \ ++ tst-tls4modb \ ++ tst-_res1mod1 \ ++ tst-_res1mod2 \ ++ tst-fini1mod \ ++ tst-create1mod \ ++ tst-atfork3mod \ ++ tst-atfork4mod \ ++# module-names ++ + test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names))) + + tst-atfork2mod.so-no-z-defs = yes ++tst-atfork3mod.so-no-z-defs = yes ++tst-atfork4mod.so-no-z-defs = yes + tst-create1mod.so-no-z-defs = yes + + ifeq ($(build-shared),yes) +@@ -226,8 +246,18 @@ tst-atfork2-ENV = MALLOC_TRACE=$(objpfx)tst-atfork2.mtrace \ + LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so + $(objpfx)tst-atfork2mod.so: $(shared-thread-library) + ++$(objpfx)tst-atfork3: $(shared-thread-library) ++LDFLAGS-tst-atfork3 = -rdynamic ++$(objpfx)tst-atfork3mod.so: $(shared-thread-library) ++ ++$(objpfx)tst-atfork4: $(shared-thread-library) ++LDFLAGS-tst-atfork4 = -rdynamic ++$(objpfx)tst-atfork4mod.so: $(shared-thread-library) ++ + ifeq ($(build-shared),yes) + $(objpfx)tst-atfork2.out: $(objpfx)tst-atfork2mod.so ++$(objpfx)tst-atfork3.out: $(objpfx)tst-atfork3mod.so ++$(objpfx)tst-atfork4.out: $(objpfx)tst-atfork4mod.so + endif + + ifeq ($(build-shared),yes) +diff --git a/sysdeps/pthread/tst-atfork3.c b/sysdeps/pthread/tst-atfork3.c +new file mode 100644 +index 0000000000000000..bb2250e432ab79ad +--- /dev/null ++++ b/sysdeps/pthread/tst-atfork3.c +@@ -0,0 +1,118 @@ ++/* Check if pthread_atfork handler can call dlclose (BZ#24595). ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++/* Check if pthread_atfork handlers do not deadlock when calling a function ++ that might alter the internal fork handle list, such as dlclose. ++ ++ The test registers a callback set with pthread_atfork(), dlopen() a shared ++ library (nptl/tst-atfork3mod.c), calls an exported symbol from the library ++ (which in turn also registers atfork handlers), and calls fork to trigger ++ the callbacks. */ ++ ++static void *handler; ++static bool run_dlclose_prepare; ++static bool run_dlclose_parent; ++static bool run_dlclose_child; ++ ++static void ++prepare (void) ++{ ++ if (run_dlclose_prepare) ++ xdlclose (handler); ++} ++ ++static void ++parent (void) ++{ ++ if (run_dlclose_parent) ++ xdlclose (handler); ++} ++ ++static void ++child (void) ++{ ++ if (run_dlclose_child) ++ xdlclose (handler); ++} ++ ++static void ++proc_func (void *closure) ++{ ++} ++ ++static void ++do_test_generic (bool dlclose_prepare, bool dlclose_parent, bool dlclose_child) ++{ ++ run_dlclose_prepare = dlclose_prepare; ++ run_dlclose_parent = dlclose_parent; ++ run_dlclose_child = dlclose_child; ++ ++ handler = xdlopen ("tst-atfork3mod.so", RTLD_NOW); ++ ++ int (*atfork3mod_func)(void); ++ atfork3mod_func = xdlsym (handler, "atfork3mod_func"); ++ ++ atfork3mod_func (); ++ ++ struct support_capture_subprocess proc ++ = support_capture_subprocess (proc_func, NULL); ++ support_capture_subprocess_check (&proc, "tst-atfork3", 0, sc_allow_none); ++ ++ handler = atfork3mod_func = NULL; ++ ++ support_capture_subprocess_free (&proc); ++} ++ ++static void * ++thread_func (void *closure) ++{ ++ return NULL; ++} ++ ++static int ++do_test (void) ++{ ++ { ++ /* Make the process acts as multithread. */ ++ pthread_attr_t attr; ++ xpthread_attr_init (&attr); ++ xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); ++ xpthread_create (&attr, thread_func, NULL); ++ } ++ ++ TEST_COMPARE (pthread_atfork (prepare, parent, child), 0); ++ ++ do_test_generic (true /* prepare */, false /* parent */, false /* child */); ++ do_test_generic (false /* prepare */, true /* parent */, false /* child */); ++ do_test_generic (false /* prepare */, false /* parent */, true /* child */); ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/pthread/tst-atfork3mod.c b/sysdeps/pthread/tst-atfork3mod.c +new file mode 100644 +index 0000000000000000..6d0658cb9efdecbc +--- /dev/null ++++ b/sysdeps/pthread/tst-atfork3mod.c +@@ -0,0 +1,44 @@ ++/* Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#include ++ ++static void ++mod_prepare (void) ++{ ++} ++ ++static void ++mod_parent (void) ++{ ++} ++ ++static void ++mod_child (void) ++{ ++} ++ ++int atfork3mod_func (void) ++{ ++ TEST_COMPARE (pthread_atfork (mod_prepare, mod_parent, mod_child), 0); ++ ++ return 0; ++} +diff --git a/sysdeps/pthread/tst-atfork4.c b/sysdeps/pthread/tst-atfork4.c +new file mode 100644 +index 0000000000000000..52dc87e73b846ab9 +--- /dev/null ++++ b/sysdeps/pthread/tst-atfork4.c +@@ -0,0 +1,128 @@ ++/* pthread_atfork supports handlers that call pthread_atfork or dlclose. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void * ++thread_func (void *x) ++{ ++ return NULL; ++} ++ ++static unsigned int second_atfork_handler_runcount = 0; ++ ++static void ++second_atfork_handler (void) ++{ ++ second_atfork_handler_runcount++; ++} ++ ++static void *h = NULL; ++ ++static unsigned int atfork_handler_runcount = 0; ++ ++static void ++prepare (void) ++{ ++ /* These atfork handlers are registered while atfork handlers are being ++ executed and thus will not be executed during the corresponding ++ fork. */ ++ TEST_VERIFY_EXIT (pthread_atfork (second_atfork_handler, ++ second_atfork_handler, ++ second_atfork_handler) == 0); ++ ++ /* This will de-register the atfork handlers registered by the dlopen'd ++ library and so they will not be executed. */ ++ if (h != NULL) ++ { ++ xdlclose (h); ++ h = NULL; ++ } ++ ++ atfork_handler_runcount++; ++} ++ ++static void ++after (void) ++{ ++ atfork_handler_runcount++; ++} ++ ++static int ++do_test (void) ++{ ++ /* Make sure __libc_single_threaded is 0. */ ++ pthread_attr_t attr; ++ xpthread_attr_init (&attr); ++ xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); ++ xpthread_create (&attr, thread_func, NULL); ++ ++ void (*reg_atfork_handlers) (void); ++ ++ h = xdlopen ("tst-atfork4mod.so", RTLD_LAZY); ++ ++ reg_atfork_handlers = xdlsym (h, "reg_atfork_handlers"); ++ ++ reg_atfork_handlers (); ++ ++ /* We register our atfork handlers *after* loading the module so that our ++ prepare handler is called first at fork, where we then dlclose the ++ module before its prepare handler has a chance to be called. */ ++ TEST_VERIFY_EXIT (pthread_atfork (prepare, after, after) == 0); ++ ++ pid_t pid = xfork (); ++ ++ /* Both the parent and the child processes should observe this. */ ++ TEST_VERIFY_EXIT (atfork_handler_runcount == 2); ++ TEST_VERIFY_EXIT (second_atfork_handler_runcount == 0); ++ ++ if (pid > 0) ++ { ++ int childstat; ++ ++ xwaitpid (-1, &childstat, 0); ++ TEST_VERIFY_EXIT (WIFEXITED (childstat) ++ && WEXITSTATUS (childstat) == 0); ++ ++ /* This time, the second set of atfork handlers should also be called ++ since the handlers are already in place before fork is called. */ ++ ++ pid = xfork (); ++ ++ TEST_VERIFY_EXIT (atfork_handler_runcount == 4); ++ TEST_VERIFY_EXIT (second_atfork_handler_runcount == 2); ++ ++ if (pid > 0) ++ { ++ xwaitpid (-1, &childstat, 0); ++ TEST_VERIFY_EXIT (WIFEXITED (childstat) ++ && WEXITSTATUS (childstat) == 0); ++ } ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/pthread/tst-atfork4mod.c b/sysdeps/pthread/tst-atfork4mod.c +new file mode 100644 +index 0000000000000000..e111efeb185916e0 +--- /dev/null ++++ b/sysdeps/pthread/tst-atfork4mod.c +@@ -0,0 +1,48 @@ ++/* pthread_atfork supports handlers that call pthread_atfork or dlclose. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++/* This dynamically loaded library simply registers its atfork handlers when ++ asked to. The atfork handlers should never be executed because the ++ library is unloaded before fork is called by the test program. */ ++ ++static void ++prepare (void) ++{ ++ abort (); ++} ++ ++static void ++parent (void) ++{ ++ abort (); ++} ++ ++static void ++child (void) ++{ ++ abort (); ++} ++ ++void ++reg_atfork_handlers (void) ++{ ++ pthread_atfork (prepare, parent, child); ++} diff --git a/SOURCES/glibc-upstream-2.34-259.patch b/SOURCES/glibc-upstream-2.34-259.patch new file mode 100644 index 0000000..d0501b8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-259.patch @@ -0,0 +1,30 @@ +commit b349fe072275bfc5763110a49fe6ef1b44d60289 +Author: Adhemerval Zanella +Date: Tue May 31 11:46:59 2022 -0300 + + misc: Use 64 bit stat for daemon (BZ# 29203) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 3fbc33010c76721d34f676d8efb45bcc54e0d575) + +diff --git a/misc/daemon.c b/misc/daemon.c +index 0e688f4d7482e335..3c73ac2ab8709812 100644 +--- a/misc/daemon.c ++++ b/misc/daemon.c +@@ -61,11 +61,10 @@ daemon (int nochdir, int noclose) + (void)__chdir("/"); + + if (!noclose) { +- struct stat64 st; ++ struct __stat64_t64 st; + + if ((fd = __open_nocancel(_PATH_DEVNULL, O_RDWR, 0)) != -1 +- && (__builtin_expect (__fstat64 (fd, &st), 0) +- == 0)) { ++ && __glibc_likely (__fstat64_time64 (fd, &st) == 0)) { + if (__builtin_expect (S_ISCHR (st.st_mode), 1) != 0 + #if defined DEV_NULL_MAJOR && defined DEV_NULL_MINOR + && (st.st_rdev diff --git a/SOURCES/glibc-upstream-2.34-260.patch b/SOURCES/glibc-upstream-2.34-260.patch new file mode 100644 index 0000000..95264aa --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-260.patch @@ -0,0 +1,34 @@ +commit aa8a87f51d7a1fb86ff75d3e3870316b6bc70dfe +Author: Adhemerval Zanella +Date: Tue May 31 11:51:46 2022 -0300 + + misc: Use 64 bit stat for getusershell (BZ# 29204) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit ec995fb2152f160f02bf695ff83c45df4a6cd868) + +diff --git a/misc/getusershell.c b/misc/getusershell.c +index 11f5aa83f888a114..4221095dca743dfa 100644 +--- a/misc/getusershell.c ++++ b/misc/getusershell.c +@@ -97,7 +97,7 @@ initshells (void) + { + char **sp, *cp; + FILE *fp; +- struct stat64 statb; ++ struct __stat64_t64 statb; + size_t flen; + + free(shells); +@@ -106,7 +106,7 @@ initshells (void) + strings = NULL; + if ((fp = fopen(_PATH_SHELLS, "rce")) == NULL) + goto init_okshells_noclose; +- if (__fstat64(fileno(fp), &statb) == -1) { ++ if (__fstat64_time64(fileno(fp), &statb) == -1) { + init_okshells: + (void)fclose(fp); + init_okshells_noclose: diff --git a/SOURCES/glibc-upstream-2.34-261.patch b/SOURCES/glibc-upstream-2.34-261.patch new file mode 100644 index 0000000..349d50d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-261.patch @@ -0,0 +1,56 @@ +commit 9db6a597ef950737d3cd7af0d4211291197b82dd +Author: Adhemerval Zanella +Date: Tue May 31 12:17:20 2022 -0300 + + posix: Use 64 bit stat for posix_fallocate fallback (BZ# 29207) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 574ba60fc8a7fb35e6216e2fdecc521acab7ffd2) + +diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c +index 0bb379c94d7cf779..4381033d6e16c2e3 100644 +--- a/sysdeps/posix/posix_fallocate.c ++++ b/sysdeps/posix/posix_fallocate.c +@@ -30,7 +30,7 @@ + int + posix_fallocate (int fd, __off_t offset, __off_t len) + { +- struct stat64 st; ++ struct __stat64_t64 st; + + if (offset < 0 || len < 0) + return EINVAL; +@@ -48,7 +48,7 @@ posix_fallocate (int fd, __off_t offset, __off_t len) + } + + /* We have to make sure that this is really a regular file. */ +- if (__fstat64 (fd, &st) != 0) ++ if (__fstat64_time64 (fd, &st) != 0) + return EBADF; + if (S_ISFIFO (st.st_mode)) + return ESPIPE; +diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c +index c1e233b49c8d7f37..d45b0c17489fbbbb 100644 +--- a/sysdeps/posix/posix_fallocate64.c ++++ b/sysdeps/posix/posix_fallocate64.c +@@ -30,7 +30,7 @@ + int + __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) + { +- struct stat64 st; ++ struct __stat64_t64 st; + + if (offset < 0 || len < 0) + return EINVAL; +@@ -48,7 +48,7 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) + } + + /* We have to make sure that this is really a regular file. */ +- if (__fstat64 (fd, &st) != 0) ++ if (__fstat64_time64 (fd, &st) != 0) + return EBADF; + if (S_ISFIFO (st.st_mode)) + return ESPIPE; diff --git a/SOURCES/glibc-upstream-2.34-262.patch b/SOURCES/glibc-upstream-2.34-262.patch new file mode 100644 index 0000000..013d55c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-262.patch @@ -0,0 +1,28 @@ +commit f9c3e57ac25511db78f3d51a38f6a715be220479 +Author: Adhemerval Zanella +Date: Tue May 31 12:22:13 2022 -0300 + + posix: Use 64 bit stat for fpathconf (_PC_ASYNC_IO) (BZ# 29208) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 6e7137f28c9d743d66b5a1cb8fa0d1717b96f853) + +diff --git a/sysdeps/posix/fpathconf.c b/sysdeps/posix/fpathconf.c +index ec0e780466756e00..e673f2016136679e 100644 +--- a/sysdeps/posix/fpathconf.c ++++ b/sysdeps/posix/fpathconf.c +@@ -131,9 +131,9 @@ __fpathconf (int fd, int name) + #ifdef _POSIX_ASYNC_IO + { + /* AIO is only allowed on regular files and block devices. */ +- struct stat64 st; ++ struct __stat64_t64 st; + +- if (__fstat64 (fd, &st) < 0 ++ if (__fstat64_time64 (fd, &st) < 0 + || (! S_ISREG (st.st_mode) && ! S_ISBLK (st.st_mode))) + return -1; + else diff --git a/SOURCES/glibc-upstream-2.34-263.patch b/SOURCES/glibc-upstream-2.34-263.patch new file mode 100644 index 0000000..b2e79e1 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-263.patch @@ -0,0 +1,31 @@ +commit 61fd3e0e7495f597b41e90d3e045b8c3b182a23d +Author: Adhemerval Zanella +Date: Tue May 31 12:28:20 2022 -0300 + + socket: Use 64 bit stat for isfdtype (BZ# 29209) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 87f1ec12e79a3895b33801fa816884f0d24ae7ef) + +diff --git a/sysdeps/posix/isfdtype.c b/sysdeps/posix/isfdtype.c +index 06b5386c4379063d..f18bcfef224ebac6 100644 +--- a/sysdeps/posix/isfdtype.c ++++ b/sysdeps/posix/isfdtype.c +@@ -24,12 +24,12 @@ + int + isfdtype (int fildes, int fdtype) + { +- struct stat64 st; ++ struct __stat64_t64 st; + int result; + + { + int save_error = errno; +- result = __fstat64 (fildes, &st); ++ result = __fstat64_time64 (fildes, &st); + __set_errno (save_error); + } + diff --git a/SOURCES/glibc-upstream-2.34-264.patch b/SOURCES/glibc-upstream-2.34-264.patch new file mode 100644 index 0000000..b8e9768 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-264.patch @@ -0,0 +1,34 @@ +commit 34422108f4e0b8fc0d950b8c00b87193a7884ee5 +Author: Adhemerval Zanella +Date: Tue May 31 12:34:48 2022 -0300 + + inet: Use 64 bit stat for ruserpass (BZ# 29210) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 3cd4785ea02cc3878bf21996cf9b61b3a306447e) + +diff --git a/inet/ruserpass.c b/inet/ruserpass.c +index d61a72877d20b7e5..75e2a065524aa1d5 100644 +--- a/inet/ruserpass.c ++++ b/inet/ruserpass.c +@@ -95,7 +95,7 @@ ruserpass (const char *host, const char **aname, const char **apass) + char *hdir, *buf, *tmp; + char myname[1024], *mydomain; + int t, usedefault = 0; +- struct stat64 stb; ++ struct __stat64_t64 stb; + + hdir = __libc_secure_getenv("HOME"); + if (hdir == NULL) { +@@ -174,7 +174,7 @@ next: + break; + case PASSWD: + if (strcmp(*aname, "anonymous") && +- __fstat64(fileno(cfile), &stb) >= 0 && ++ __fstat64_time64(fileno(cfile), &stb) >= 0 && + (stb.st_mode & 077) != 0) { + warnx(_("Error: .netrc file is readable by others.")); + warnx(_("Remove 'password' line or make file unreadable by others.")); diff --git a/SOURCES/glibc-upstream-2.34-265.patch b/SOURCES/glibc-upstream-2.34-265.patch new file mode 100644 index 0000000..0bd0c81 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-265.patch @@ -0,0 +1,34 @@ +commit 52431199b5cef8f56c71c66f5859b097804aebe8 +Author: Adhemerval Zanella +Date: Tue May 31 12:38:55 2022 -0300 + + catgets: Use 64 bit stat for __open_catalog (BZ# 29211) + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit c86631de6fa2fb5fa293810c66e53898537a4ddc) + +diff --git a/catgets/open_catalog.c b/catgets/open_catalog.c +index 7f67cc056445b5e2..75703b2cadd1764c 100644 +--- a/catgets/open_catalog.c ++++ b/catgets/open_catalog.c +@@ -40,7 +40,7 @@ __open_catalog (const char *cat_name, const char *nlspath, const char *env_var, + __nl_catd catalog) + { + int fd = -1; +- struct stat64 st; ++ struct __stat64_t64 st; + int swapping; + size_t cnt; + size_t max_offset; +@@ -194,7 +194,7 @@ __open_catalog (const char *cat_name, const char *nlspath, const char *env_var, + return -1; + } + +- if (__builtin_expect (__fstat64 (fd, &st), 0) < 0) ++ if (__glibc_unlikely (__fstat64_time64 (fd, &st) < 0)) + goto close_unlock_return; + + if (__builtin_expect (!S_ISREG (st.st_mode), 0) diff --git a/SOURCES/glibc-upstream-2.34-266.patch b/SOURCES/glibc-upstream-2.34-266.patch new file mode 100644 index 0000000..7bf98d2 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-266.patch @@ -0,0 +1,47 @@ +commit b3f935940ebcdf553b64e74fdf65dfd4858821ad +Author: Adhemerval Zanella +Date: Tue May 31 12:51:43 2022 -0300 + + iconv: Use 64 bit stat for gconv_parseconfdir (BZ# 29213) + + The issue is only when used within libc.so (iconvconfig already builds + with _TIME_SIZE=64). + + This is a missing spot initially from 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit c789e6e40974e2b67bd33a17f29b20dce6ae8822) + +diff --git a/iconv/gconv_parseconfdir.h b/iconv/gconv_parseconfdir.h +index a586268abc103abd..79398a980cde84e3 100644 +--- a/iconv/gconv_parseconfdir.h ++++ b/iconv/gconv_parseconfdir.h +@@ -32,8 +32,11 @@ + # define readdir __readdir + # define closedir __closedir + # define mempcpy __mempcpy +-# define lstat64 __lstat64 ++# define struct_stat struct __stat64_t64 ++# define lstat __lstat64_time64 + # define feof_unlocked __feof_unlocked ++#else ++# define struct_stat struct stat + #endif + + /* Name of the file containing the module information in the directories +@@ -158,12 +161,12 @@ gconv_parseconfdir (const char *prefix, const char *dir, size_t dir_len) + && strcmp (ent->d_name + len - strlen (suffix), suffix) == 0) + { + char *conf; +- struct stat64 st; ++ struct_stat st; + if (asprintf (&conf, "%s/%s", buf, ent->d_name) < 0) + continue; + + if (ent->d_type != DT_UNKNOWN +- || (lstat64 (conf, &st) != -1 && S_ISREG (st.st_mode))) ++ || (lstat (conf, &st) != -1 && S_ISREG (st.st_mode))) + found |= read_conf_file (conf, dir, dir_len); + + free (conf); diff --git a/SOURCES/glibc-upstream-2.34-267.patch b/SOURCES/glibc-upstream-2.34-267.patch new file mode 100644 index 0000000..1511b1e --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-267.patch @@ -0,0 +1,21 @@ +commit 9947f2df19a6c5b5706ed3b002199dbb9fef17b1 +Author: Dmitriy Fedchenko +Date: Mon Jun 6 12:46:14 2022 -0300 + + socket: Fix mistyped define statement in socket/sys/socket.h (BZ #29225) + + (cherry picked from commit 999835533bc60fbd0b0b65d2412a6742e5a54b9d) + +diff --git a/socket/sys/socket.h b/socket/sys/socket.h +index bd14e7e3a5008ec5..5765dc1f0dedd380 100644 +--- a/socket/sys/socket.h ++++ b/socket/sys/socket.h +@@ -181,7 +181,7 @@ extern ssize_t __REDIRECT (sendmsg, (int __fd, const struct msghdr *__message, + # else + extern ssize_t __sendmsg64 (int __fd, const struct msghdr *__message, + int __flags); +-# defien sendmsg __sendmsg64 ++# define sendmsg __sendmsg64 + # endif + #endif + diff --git a/SOURCES/glibc-rh2095450.patch b/SOURCES/glibc-upstream-2.34-268.patch similarity index 91% rename from SOURCES/glibc-rh2095450.patch rename to SOURCES/glibc-upstream-2.34-268.patch index da79ea4..2878763 100644 --- a/SOURCES/glibc-rh2095450.patch +++ b/SOURCES/glibc-upstream-2.34-268.patch @@ -1,4 +1,4 @@ -commit 0218463dd8265ed937622f88ac68c7d984fe0cfc +commit 4c92a1041257c0155c6aa7a182fe5f78e477b0e6 Author: Matheus Castanho Date: Tue Jun 7 10:27:26 2022 -0300 @@ -16,6 +16,7 @@ Date: Tue Jun 7 10:27:26 2022 -0300 Tested on powerpc, powerpc64 and powerpc64le. Signed-off-by: Kewen Lin + (cherry picked from commit 0218463dd8265ed937622f88ac68c7d984fe0cfc) diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncpy.S b/sysdeps/powerpc/powerpc64/le/power9/strncpy.S index 291941c1e5c0eb4b..5421525acee3ebfe 100644 diff --git a/SOURCES/glibc-upstream-2.34-269.patch b/SOURCES/glibc-upstream-2.34-269.patch new file mode 100644 index 0000000..35c6ab2 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-269.patch @@ -0,0 +1,126 @@ +commit a7ec6363a3a8fd7a2014fd7398bcdcab42919ec1 +Author: Adhemerval Zanella +Date: Tue May 31 17:13:35 2022 -0300 + + nptl: Fix __libc_cleanup_pop_restore asynchronous restore (BZ#29214) + + This was due a wrong revert done on 404656009b459658. + + Checked on x86_64-linux-gnu. + + (cherry picked from commit c7d36dcecc08a29825175f65c4ee873ff3177a23) + +diff --git a/nptl/libc-cleanup.c b/nptl/libc-cleanup.c +index fccb1abe69aa693c..a37c48ff876d613a 100644 +--- a/nptl/libc-cleanup.c ++++ b/nptl/libc-cleanup.c +@@ -58,7 +58,8 @@ __libc_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer) + THREAD_SETMEM (self, cleanup, buffer->__prev); + + int cancelhandling = atomic_load_relaxed (&self->cancelhandling); +- if (cancelhandling & CANCELTYPE_BITMASK) ++ if (buffer->__canceltype != PTHREAD_CANCEL_DEFERRED ++ && (cancelhandling & CANCELTYPE_BITMASK) == 0) + { + int newval; + do +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index 5147588c130c9415..d99c161c827ef4b8 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -126,6 +126,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ + tst-pthread-raise-blocked-self \ + tst-pthread_kill-exited \ + tst-pthread_kill-exiting \ ++ tst-cancel30 \ + # tests + + tests-time64 := \ +diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c +new file mode 100644 +index 0000000000000000..e08392f96874de5f +--- /dev/null ++++ b/sysdeps/pthread/tst-cancel30.c +@@ -0,0 +1,82 @@ ++/* Check if printf like functions does not disable asynchronous cancellation ++ mode (BZ#29214). ++ ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++static pthread_barrier_t b; ++ ++static void * ++tf (void *arg) ++{ ++ int old; ++ ++ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, NULL), 0); ++ ++ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, &old), 0); ++ TEST_COMPARE (old, PTHREAD_CANCEL_ASYNCHRONOUS); ++ ++ /* Check if internal lock cleanup routines restore the cancellation type ++ correctly. */ ++ printf ("...\n"); ++ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, &old), 0); ++ TEST_COMPARE (old, PTHREAD_CANCEL_ASYNCHRONOUS); ++ ++ xpthread_barrier_wait (&b); ++ ++ /* Wait indefinitely for cancellation, which only works if asynchronous ++ cancellation is enabled. */ ++#ifdef SYS_pause ++ syscall (SYS_pause); ++#elif defined SYS_ppoll || defined SYS_ppoll_time64 ++# ifndef SYS_ppoll_time64 ++# define SYS_ppoll_time64 SYS_ppoll ++# endif ++ syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); ++#else ++ for (;;); ++#endif ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ xpthread_barrier_init (&b, NULL, 2); ++ ++ pthread_t th = xpthread_create (NULL, tf, NULL); ++ ++ xpthread_barrier_wait (&b); ++ ++ xpthread_cancel (th); ++ ++ void *status = xpthread_join (th); ++ TEST_VERIFY (status == PTHREAD_CANCELED); ++ ++ return 0; ++} ++ ++/* There is no need to wait full TIMEOUT if asynchronous is not working. */ ++#define TIMEOUT 3 ++#include diff --git a/SOURCES/glibc-upstream-2.34-270.patch b/SOURCES/glibc-upstream-2.34-270.patch new file mode 100644 index 0000000..07ab4c9 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-270.patch @@ -0,0 +1,83 @@ +commit 96944f0f81870b733f518950a108c7ad6b078da6 +Author: Adhemerval Zanella +Date: Wed May 25 08:58:38 2022 -0300 + + hppa: Remove _dl_skip_args usage (BZ# 29165) + + Different than other architectures, hppa creates an unrelated stack + frame where ld.so argc/argv adjustments done by ad43cac44a6860eaefc + is not done on the argc/argv saved/restore by _dl_start_user. + + Instead load _dl_argc and _dl_argv directlty instead of adjust them + using _dl_skip_args value. + + Checked on hppa-linux-gnu. + + Reviewed-by: Carlos O'Donell + (cherry picked from commit 6242602273feb8d68cd51cff0ad21b3c8ee11fc6) + +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index ac66f044189edd18..df6269209f3268b7 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -374,10 +374,6 @@ asm ( \ + "_start:\n" \ + /* The kernel does not give us an initial stack frame. */ \ + " ldo 64(%sp),%sp\n" \ +- /* Save the relevant arguments (yes, those are the correct \ +- registers, the kernel is weird) in their stack slots. */ \ +-" stw %r25,-40(%sp)\n" /* argc */ \ +-" stw %r24,-44(%sp)\n" /* argv */ \ + \ + /* We need the LTP, and we need it now. \ + $PIC_pcrel$0 points 8 bytes past the current instruction, \ +@@ -435,12 +431,7 @@ asm ( \ + So, obviously, we can't just pass %sp to _dl_start. That's \ + okay, argv-4 will do just fine. \ + \ +- The pleasant part of this is that if we need to skip \ +- arguments we can just decrement argc and move argv, because \ +- the stack pointer is utterly unrelated to the location of \ +- the environment and argument vectors. */ \ +- \ +- /* This is always within range so we'll be okay. */ \ ++ This is always within range so we'll be okay. */ \ + " bl _dl_start,%rp\n" \ + " ldo -4(%r24),%r26\n" \ + \ +@@ -450,22 +441,23 @@ asm ( \ + /* Save the entry point in %r3. */ \ + " copy %ret0,%r3\n" \ + \ +- /* See if we were called as a command with the executable file \ +- name as an extra leading argument. */ \ +-" addil LT'_dl_skip_args,%r19\n" \ +-" ldw RT'_dl_skip_args(%r1),%r20\n" \ +-" ldw 0(%r20),%r20\n" \ +- \ +-" ldw -40(%sp),%r25\n" /* argc */ \ +-" comib,= 0,%r20,.Lnofix\n" /* FIXME: Mispredicted branch */\ +-" ldw -44(%sp),%r24\n" /* argv (delay slot) */ \ ++ /* The loader adjusts argc, argv, env, and the aux vectors \ ++ directly on the stack to remove any arguments used for \ ++ direct loader invocation. Thus, argc and argv must be \ ++ reloaded from from _dl_argc and _dl_argv. */ \ + \ +-" sub %r25,%r20,%r25\n" \ ++ /* Load argc from _dl_argc. */ \ ++" addil LT'_dl_argc,%r19\n" \ ++" ldw RT'_dl_argc(%r1),%r20\n" \ ++" ldw 0(%r20),%r25\n" \ + " stw %r25,-40(%sp)\n" \ +-" sh2add %r20,%r24,%r24\n" \ ++ \ ++ /* Same for argv with _dl_argv. */ \ ++" addil LT'_dl_argv,%r19\n" \ ++" ldw RT'_dl_argv(%r1),%r20\n" \ ++" ldw 0(%r20),%r24\n" \ + " stw %r24,-44(%sp)\n" \ + \ +-".Lnofix:\n" \ + /* Call _dl_init(main_map, argc, argv, envp). */ \ + " addil LT'_rtld_local,%r19\n" \ + " ldw RT'_rtld_local(%r1),%r26\n" \ diff --git a/SOURCES/glibc-upstream-2.34-271.patch b/SOURCES/glibc-upstream-2.34-271.patch new file mode 100644 index 0000000..d5f6144 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-271.patch @@ -0,0 +1,84 @@ +commit bb4148283fa7c52fbc7efe19e81cd129adc7fd61 +Author: Adhemerval Zanella +Date: Thu May 26 13:12:21 2022 -0300 + + nios2: Remove _dl_skip_args usage (BZ# 29187) + + Since ad43cac44a the generic code already shuffles the argv/envp/auxv + on the stack to remove the ld.so own arguments and thus _dl_skip_args + is always 0. So there is no need to adjust the argc or argv. + + Checked with qemu-user that arguments are correctly passed on both + constructors and main program. + + Reviewed-by: Carlos O'Donell + (cherry picked from commit 4868ba5d257a7fb415674e79c4ae5a3af2827f55) + +diff --git a/sysdeps/nios2/dl-machine.h b/sysdeps/nios2/dl-machine.h +index 430ca5d7ae1e0372..47b3f6acd7624bcb 100644 +--- a/sysdeps/nios2/dl-machine.h ++++ b/sysdeps/nios2/dl-machine.h +@@ -128,53 +128,23 @@ _start:\n\ + ldw r8, %call(_dl_nios2_get_gp_value)(r22)\n\ + callr r8\n\ + mov gp, r2\n\ +-\n\ +- /* Find the number of arguments to skip. */\n\ +- ldw r8, %got(_dl_skip_args)(r22)\n\ +- ldw r8, 0(r8)\n\ + \n\ + /* Find the main_map from the GOT. */\n\ + ldw r4, %got(_rtld_local)(r22)\n\ + ldw r4, 0(r4)\n\ + \n\ +- /* Find argc. */\n\ +- ldw r5, 0(sp)\n\ +- sub r5, r5, r8\n\ +- stw r5, 0(sp)\n\ +-\n\ +- /* Find the first unskipped argument. */\n\ +- slli r8, r8, 2\n\ +- addi r6, sp, 4\n\ +- add r9, r6, r8\n\ +- mov r10, r6\n\ +-\n\ +- /* Shuffle argv down. */\n\ +-3: ldw r11, 0(r9)\n\ +- stw r11, 0(r10)\n\ +- addi r9, r9, 4\n\ +- addi r10, r10, 4\n\ +- bne r11, zero, 3b\n\ ++ /* Load adjusted argc. */\n\ ++ ldw r2, %got(_dl_argc)(r22)\n\ ++ ldw r5, 0(r2)\n\ + \n\ +- /* Shuffle envp down. */\n\ +- mov r7, r10\n\ +-4: ldw r11, 0(r9)\n\ +- stw r11, 0(r10)\n\ +- addi r9, r9, 4\n\ +- addi r10, r10, 4\n\ +- bne r11, zero, 4b\n\ +-\n\ +- /* Shuffle auxv down. */\n\ +-5: ldw r11, 4(r9)\n\ +- stw r11, 4(r10)\n\ +- ldw r11, 0(r9)\n\ +- stw r11, 0(r10)\n\ +- addi r9, r9, 8\n\ +- addi r10, r10, 8\n\ +- bne r11, zero, 5b\n\ +-\n\ +- /* Update _dl_argv. */\n\ ++ /* Load adjsuted argv. */\n\ + ldw r2, %got(_dl_argv)(r22)\n\ +- stw r6, 0(r2)\n\ ++ ldw r6, 0(r2)\n\ ++\n\ ++ /* envp = argv + argc + 1 */\n\ ++ addi r7, r5, 1\n\ ++ slli r7, r7, 2\n\ ++ add r7, r7, r6\n\ + \n\ + /* Call _dl_init through the PLT. */\n\ + ldw r8, %call(_dl_init)(r22)\n\ diff --git a/SOURCES/glibc-upstream-2.34-272.patch b/SOURCES/glibc-upstream-2.34-272.patch new file mode 100644 index 0000000..289b763 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-272.patch @@ -0,0 +1,37 @@ +commit 368c5c3e001a37571b61ab342f2b654c3d23643d +Author: Sam James +Date: Sun Jun 5 04:57:09 2022 +0100 + + nss: add assert to DB_LOOKUP_FCT (BZ #28752) + + It's interesting if we have a null action list, + so an assert is worthwhile. + + Suggested-by: DJ Delorie + Signed-off-by: Sam James + Reviewed-by: DJ Delorie + (cherry picked from commit 3fdf0a205b622e40fa7e3c4ed1e4ed4d5c6c5380) + +diff --git a/nss/XXX-lookup.c b/nss/XXX-lookup.c +index dbc87868dd408d9f..343fd9869bd12714 100644 +--- a/nss/XXX-lookup.c ++++ b/nss/XXX-lookup.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include "nsswitch.h" + + /*******************************************************************\ +@@ -55,6 +56,10 @@ DB_LOOKUP_FCT (nss_action_list *ni, const char *fct_name, const char *fct2_name, + + *ni = DATABASE_NAME_SYMBOL; + ++ /* We want to know about it if we've somehow got a NULL action list; ++ in the past, we had bad state if seccomp interfered with setup. */ ++ assert(*ni != NULL); ++ + return __nss_lookup (ni, fct_name, fct2_name, fctp); + } + libc_hidden_def (DB_LOOKUP_FCT) diff --git a/SOURCES/glibc-upstream-2.34-273.patch b/SOURCES/glibc-upstream-2.34-273.patch new file mode 100644 index 0000000..698b3e5 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-273.patch @@ -0,0 +1,74 @@ +commit 94ab2088c37d8e4285354af120b7ed6b887b9e53 +Author: Sam James +Date: Sun Jun 5 04:57:10 2022 +0100 + + nss: handle stat failure in check_reload_and_get (BZ #28752) + + Skip the chroot test if the database isn't loaded + correctly (because the chroot test uses some + existing DB state). + + The __stat64_time64 -> fstatat call can fail if + running under an (aggressive) seccomp filter, + like Firefox seems to use. + + This manifested in a crash when using glib built + with FAM support with such a Firefox build. + + Suggested-by: DJ Delorie + Signed-off-by: Sam James + Reviewed-by: DJ Delorie + (cherry picked from commit ace9e3edbca62d978b1e8f392d8a5d78500272d9) + +diff --git a/nss/nss_database.c b/nss/nss_database.c +index 54561f03287db2e4..e807e9d84ca03680 100644 +--- a/nss/nss_database.c ++++ b/nss/nss_database.c +@@ -420,23 +420,32 @@ nss_database_check_reload_and_get (struct nss_database_state *local, + return true; + } + +- /* Before we reload, verify that "/" hasn't changed. We assume that +- errors here are very unlikely, but the chance that we're entering +- a container is also very unlikely, so we err on the side of both +- very unlikely things not happening at the same time. */ +- if (__stat64_time64 ("/", &str) != 0 +- || (local->root_ino != 0 +- && (str.st_ino != local->root_ino +- || str.st_dev != local->root_dev))) ++ int stat_rv = __stat64_time64 ("/", &str); ++ ++ if (local->data.services[database_index] != NULL) + { +- /* Change detected; disable reloading and return current state. */ +- atomic_store_release (&local->data.reload_disabled, 1); +- *result = local->data.services[database_index]; +- __libc_lock_unlock (local->lock); +- return true; ++ /* Before we reload, verify that "/" hasn't changed. We assume that ++ errors here are very unlikely, but the chance that we're entering ++ a container is also very unlikely, so we err on the side of both ++ very unlikely things not happening at the same time. */ ++ if (stat_rv != 0 ++ || (local->root_ino != 0 ++ && (str.st_ino != local->root_ino ++ || str.st_dev != local->root_dev))) ++ { ++ /* Change detected; disable reloading and return current state. */ ++ atomic_store_release (&local->data.reload_disabled, 1); ++ *result = local->data.services[database_index]; ++ __libc_lock_unlock (local->lock); ++ return true; ++ } ++ } ++ if (stat_rv == 0) ++ { ++ local->root_ino = str.st_ino; ++ local->root_dev = str.st_dev; + } +- local->root_ino = str.st_ino; +- local->root_dev = str.st_dev; ++ + __libc_lock_unlock (local->lock); + + /* Avoid overwriting the global configuration until we have loaded diff --git a/SOURCES/glibc-upstream-2.34-274.patch b/SOURCES/glibc-upstream-2.34-274.patch new file mode 100644 index 0000000..c56ed93 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-274.patch @@ -0,0 +1,27 @@ +commit 4b246b2bbd1d5a77035bb990d6097b7337c34bbb +Author: Adhemerval Zanella +Date: Thu Jun 30 09:08:31 2022 -0300 + + linux: Fix mq_timereceive check for 32 bit fallback code (BZ 29304) + + On success, mq_receive() and mq_timedreceive() return the number of + bytes in the received message, so it requires to check if the value + is larger than 0. + + Checked on i686-linux-gnu. + + (cherry picked from commit 71d87d85bf54f6522813aec97c19bdd24997341e) + +diff --git a/sysdeps/unix/sysv/linux/mq_timedreceive.c b/sysdeps/unix/sysv/linux/mq_timedreceive.c +index 7f3a112d7f2cbbe7..1fc98752e7d6d506 100644 +--- a/sysdeps/unix/sysv/linux/mq_timedreceive.c ++++ b/sysdeps/unix/sysv/linux/mq_timedreceive.c +@@ -41,7 +41,7 @@ ___mq_timedreceive_time64 (mqd_t mqdes, char *__restrict msg_ptr, size_t msg_len + { + int r = SYSCALL_CANCEL (mq_timedreceive_time64, mqdes, msg_ptr, msg_len, + msg_prio, abs_timeout); +- if (r == 0 || errno != ENOSYS) ++ if (r >= 0 || errno != ENOSYS) + return r; + __set_errno (EOVERFLOW); + return -1; diff --git a/SOURCES/glibc-upstream-2.34-275.patch b/SOURCES/glibc-upstream-2.34-275.patch new file mode 100644 index 0000000..155d9e1 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-275.patch @@ -0,0 +1,25 @@ +commit 7789a849234f8b303a571134abe72691ce8c2540 +Author: Adhemerval Zanella +Date: Wed Jul 13 10:37:32 2022 -0300 + + nptl: Fix ___pthread_unregister_cancel_restore asynchronous restore + + This was due a wrong revert done on 404656009b459658. + + Checked on x86_64-linux-gnu and i686-linux-gnu. + + (cherry picked from commit f27e5e21787abc9f719879af47687221aa1027b3) + +diff --git a/nptl/cleanup_defer.c b/nptl/cleanup_defer.c +index 35ba40fb0247c7cc..59571229d8ccf481 100644 +--- a/nptl/cleanup_defer.c ++++ b/nptl/cleanup_defer.c +@@ -72,7 +72,7 @@ ___pthread_unregister_cancel_restore (__pthread_unwind_buf_t *buf) + return; + + int cancelhandling = atomic_load_relaxed (&self->cancelhandling); +- if (cancelhandling & CANCELTYPE_BITMASK) ++ if ((cancelhandling & CANCELTYPE_BITMASK) == 0) + { + int newval; + do diff --git a/SOURCES/glibc-upstream-2.34-276.patch b/SOURCES/glibc-upstream-2.34-276.patch new file mode 100644 index 0000000..5e9bb63 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-276.patch @@ -0,0 +1,29 @@ +commit 8d324019e69203f5998f223d0e905de1395330ea +Author: Sunil K Pandey +Date: Mon Jul 18 18:38:48 2022 -0700 + + x86_64: Remove end of line trailing spaces + + This commit remove trailing space introduced by following commit. + + commit a775a7a3eb1e85b54af0b4ee5ff4dcf66772a1fb + Author: Noah Goldstein + Date: Wed Jun 23 01:56:29 2021 -0400 + + x86: Fix overflow bug in wcsnlen-sse4_1 and wcsnlen-avx2 [BZ #27974] + +diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S +index b7657282bd2e6fa5..031753a91763b351 100644 +--- a/sysdeps/x86_64/multiarch/strlen-vec.S ++++ b/sysdeps/x86_64/multiarch/strlen-vec.S +@@ -66,8 +66,8 @@ ENTRY(strlen) + L(n_nonzero): + # ifdef AS_WCSLEN + /* Check for overflow from maxlen * sizeof(wchar_t). If it would +- overflow the only way this program doesn't have undefined behavior +- is if there is a null terminator in valid memory so wcslen will ++ overflow the only way this program doesn't have undefined behavior ++ is if there is a null terminator in valid memory so wcslen will + suffice. */ + mov %RSI_LP, %R10_LP + sar $62, %R10_LP diff --git a/SOURCES/glibc-upstream-2.34-277.patch b/SOURCES/glibc-upstream-2.34-277.patch new file mode 100644 index 0000000..3f15409 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-277.patch @@ -0,0 +1,457 @@ +commit eb9aa96facc5231e208de0946870f10c21aee9f3 +Author: Adhemerval Zanella +Date: Fri May 13 09:33:30 2022 -0300 + + x86_64: Remove bzero optimization + + Both symbols are marked as legacy in POSIX.1-2001 and removed on + POSIX.1-2008, although the prototypes are defined for _GNU_SOURCE + or _DEFAULT_SOURCE. + + GCC also replaces bcopy with a memmove and bzero with memset on default + configuration (to actually get a bzero libc call the code requires + to omit string.h inclusion and built with -fno-builtin), so it is + highly unlikely programs are actually calling libc bzero symbol. + + On a recent Linux distro (Ubuntu 22.04), there is no bzero calls + by the installed binaries. + + $ cat count_bstring.sh + #!/bin/bash + + files=`IFS=':';for i in $PATH; do test -d "$i" && find "$i" -maxdepth 1 -executable -type f; done` + total=0 + for file in $files; do + symbols=`objdump -R $file 2>&1` + if [ $? -eq 0 ]; then + ncalls=`echo $symbols | grep -w $1 | wc -l` + ((total=total+ncalls)) + if [ $ncalls -gt 0 ]; then + echo "$file: $ncalls" + fi + fi + done + echo "TOTAL=$total" + $ ./count_bstring.sh bzero + TOTAL=0 + + Checked on x86_64-linux-gnu. + + (cherry picked from commit 9403b71ae97e3f1a91c796ddcbb4e6f044434734) + +diff --git a/sysdeps/x86_64/bzero.S b/sysdeps/x86_64/bzero.S +deleted file mode 100644 +index f96d567fd87696af..0000000000000000 +--- a/sysdeps/x86_64/bzero.S ++++ /dev/null +@@ -1 +0,0 @@ +-/* Implemented in memset.S. */ +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 0358210c7ff3a976..2b64741fd10a8ec2 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -1,4 +1,4 @@ +-/* memset/bzero -- set memory area to CH/0 ++/* memset -- set memory area to CH/0 + Optimized version for x86-64. + Copyright (C) 2002-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. +@@ -35,9 +35,6 @@ + punpcklwd %xmm0, %xmm0; \ + pshufd $0, %xmm0, %xmm0 + +-# define BZERO_ZERO_VEC0() \ +- pxor %xmm0, %xmm0 +- + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ + pshufd $0, %xmm0, %xmm0; \ +@@ -56,10 +53,6 @@ + # define MEMSET_SYMBOL(p,s) memset + #endif + +-#ifndef BZERO_SYMBOL +-# define BZERO_SYMBOL(p,s) __bzero +-#endif +- + #ifndef WMEMSET_SYMBOL + # define WMEMSET_CHK_SYMBOL(p,s) p + # define WMEMSET_SYMBOL(p,s) __wmemset +@@ -70,7 +63,6 @@ + libc_hidden_builtin_def (memset) + + #if IS_IN (libc) +-weak_alias (__bzero, bzero) + libc_hidden_def (__wmemset) + weak_alias (__wmemset, wmemset) + libc_hidden_weak (wmemset) +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index b503e4b81e92a11c..67401162d526f664 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -1,7 +1,6 @@ + ifeq ($(subdir),string) + + sysdep_routines += \ +- bzero \ + memchr-avx2 \ + memchr-avx2-rtm \ + memchr-evex \ +diff --git a/sysdeps/x86_64/multiarch/bzero.c b/sysdeps/x86_64/multiarch/bzero.c +deleted file mode 100644 +index 13e399a9a1fbdeb2..0000000000000000 +--- a/sysdeps/x86_64/multiarch/bzero.c ++++ /dev/null +@@ -1,108 +0,0 @@ +-/* Multiple versions of bzero. +- All versions must be listed in ifunc-impl-list.c. +- Copyright (C) 2022 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-/* Define multiple versions only for the definition in libc. */ +-#if IS_IN (libc) +-# define __bzero __redirect___bzero +-# include +-# undef __bzero +- +-/* OPTIMIZE1 definition required for bzero patch. */ +-# define OPTIMIZE1(name) EVALUATOR1 (SYMBOL_NAME, name) +-# define SYMBOL_NAME __bzero +-# include +- +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned_erms) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned) attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_rtm) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms_rtm) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned_erms) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned) +- attribute_hidden; +-extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned_erms) +- attribute_hidden; +- +-static inline void * +-IFUNC_SELECTOR (void) +-{ +- const struct cpu_features* cpu_features = __get_cpu_features (); +- +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE1 (avx512_unaligned_erms); +- +- return OPTIMIZE1 (avx512_unaligned); +- } +- } +- +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) +- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE1 (evex_unaligned_erms); +- +- return OPTIMIZE1 (evex_unaligned); +- } +- +- if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE1 (avx2_unaligned_erms_rtm); +- +- return OPTIMIZE1 (avx2_unaligned_rtm); +- } +- +- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) +- { +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE1 (avx2_unaligned_erms); +- +- return OPTIMIZE1 (avx2_unaligned); +- } +- } +- +- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) +- return OPTIMIZE1 (sse2_unaligned_erms); +- +- return OPTIMIZE1 (sse2_unaligned); +-} +- +-libc_ifunc_redirected (__redirect___bzero, __bzero, IFUNC_SELECTOR ()); +- +-weak_alias (__bzero, bzero) +-#endif +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index e5e48b36c3175e68..d990a7149489efd9 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -280,48 +280,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __memset_avx512_no_vzeroupper) + ) + +- /* Support sysdeps/x86_64/multiarch/bzero.c. */ +- IFUNC_IMPL (i, name, bzero, +- IFUNC_IMPL_ADD (array, i, bzero, 1, +- __bzero_sse2_unaligned) +- IFUNC_IMPL_ADD (array, i, bzero, 1, +- __bzero_sse2_unaligned_erms) +- IFUNC_IMPL_ADD (array, i, bzero, +- CPU_FEATURE_USABLE (AVX2), +- __bzero_avx2_unaligned) +- IFUNC_IMPL_ADD (array, i, bzero, +- CPU_FEATURE_USABLE (AVX2), +- __bzero_avx2_unaligned_erms) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX2) +- && CPU_FEATURE_USABLE (RTM)), +- __bzero_avx2_unaligned_rtm) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX2) +- && CPU_FEATURE_USABLE (RTM)), +- __bzero_avx2_unaligned_erms_rtm) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX512VL) +- && CPU_FEATURE_USABLE (AVX512BW) +- && CPU_FEATURE_USABLE (BMI2)), +- __bzero_evex_unaligned) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX512VL) +- && CPU_FEATURE_USABLE (AVX512BW) +- && CPU_FEATURE_USABLE (BMI2)), +- __bzero_evex_unaligned_erms) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX512VL) +- && CPU_FEATURE_USABLE (AVX512BW) +- && CPU_FEATURE_USABLE (BMI2)), +- __bzero_avx512_unaligned_erms) +- IFUNC_IMPL_ADD (array, i, bzero, +- (CPU_FEATURE_USABLE (AVX512VL) +- && CPU_FEATURE_USABLE (AVX512BW) +- && CPU_FEATURE_USABLE (BMI2)), +- __bzero_avx512_unaligned) +- ) +- + /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +index 5a5ee6f67299400b..8ac3e479bba488be 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S +@@ -5,7 +5,6 @@ + + #define SECTION(p) p##.avx.rtm + #define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm +-#define BZERO_SYMBOL(p,s) p##_avx2_##s##_rtm + #define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + + #include "memset-avx2-unaligned-erms.S" +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index a093a2831f3dfa0d..c0bf2875d03d51ab 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -14,9 +14,6 @@ + vmovd d, %xmm0; \ + movq r, %rax; + +-# define BZERO_ZERO_VEC0() \ +- vpxor %xmm0, %xmm0, %xmm0 +- + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + MEMSET_SET_VEC0_AND_SET_RETURN(d, r) + +@@ -32,9 +29,6 @@ + # ifndef MEMSET_SYMBOL + # define MEMSET_SYMBOL(p,s) p##_avx2_##s + # endif +-# ifndef BZERO_SYMBOL +-# define BZERO_SYMBOL(p,s) p##_avx2_##s +-# endif + # ifndef WMEMSET_SYMBOL + # define WMEMSET_SYMBOL(p,s) p##_avx2_##s + # endif +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index 727c92133a15900f..5241216a77bf72b7 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -19,9 +19,6 @@ + vpbroadcastb d, %VEC0; \ + movq r, %rax + +-# define BZERO_ZERO_VEC0() \ +- vpxorq %XMM0, %XMM0, %XMM0 +- + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + vpbroadcastd d, %VEC0; \ + movq r, %rax +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +index 5d8fa78f05476b10..637002150659123c 100644 +--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -19,9 +19,6 @@ + vpbroadcastb d, %VEC0; \ + movq r, %rax + +-# define BZERO_ZERO_VEC0() \ +- vpxorq %XMM0, %XMM0, %XMM0 +- + # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ + vpbroadcastd d, %VEC0; \ + movq r, %rax +diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +index 2951f7f5f70e274a..c47f3a9c955508a2 100644 +--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S +@@ -22,7 +22,6 @@ + + #if IS_IN (libc) + # define MEMSET_SYMBOL(p,s) p##_sse2_##s +-# define BZERO_SYMBOL(p,s) MEMSET_SYMBOL (p, s) + # define WMEMSET_SYMBOL(p,s) p##_sse2_##s + + # ifdef SHARED +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index d9c577fb5ff9700f..abc12d9cda1b3843 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -1,5 +1,5 @@ +-/* memset/bzero with unaligned store and rep stosb +- Copyright (C) 2016-2021 Free Software Foundation, Inc. ++/* memset with unaligned store and rep stosb ++ Copyright (C) 2016-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -26,10 +26,6 @@ + + #include + +-#ifndef BZERO_SYMBOL +-# define BZERO_SYMBOL(p,s) MEMSET_SYMBOL (p, s) +-#endif +- + #ifndef MEMSET_CHK_SYMBOL + # define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s) + #endif +@@ -134,31 +130,6 @@ ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned)) + END (WMEMSET_SYMBOL (__wmemset, unaligned)) + #endif + +-ENTRY (BZERO_SYMBOL(__bzero, unaligned)) +-#if VEC_SIZE > 16 +- BZERO_ZERO_VEC0 () +-#endif +- mov %RDI_LP, %RAX_LP +- mov %RSI_LP, %RDX_LP +-#ifndef USE_LESS_VEC_MASK_STORE +- xorl %esi, %esi +-#endif +- cmp $VEC_SIZE, %RDX_LP +- jb L(less_vec_no_vdup) +-#ifdef USE_LESS_VEC_MASK_STORE +- xorl %esi, %esi +-#endif +-#if VEC_SIZE <= 16 +- BZERO_ZERO_VEC0 () +-#endif +- cmp $(VEC_SIZE * 2), %RDX_LP +- ja L(more_2x_vec) +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) +- VZEROUPPER_RETURN +-END (BZERO_SYMBOL(__bzero, unaligned)) +- + #if defined SHARED && IS_IN (libc) + ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) + cmp %RDX_LP, %RCX_LP +@@ -216,31 +187,6 @@ END (__memset_erms) + END (MEMSET_SYMBOL (__memset, erms)) + # endif + +-ENTRY_P2ALIGN (BZERO_SYMBOL(__bzero, unaligned_erms), 6) +-# if VEC_SIZE > 16 +- BZERO_ZERO_VEC0 () +-# endif +- mov %RDI_LP, %RAX_LP +- mov %RSI_LP, %RDX_LP +-# ifndef USE_LESS_VEC_MASK_STORE +- xorl %esi, %esi +-# endif +- cmp $VEC_SIZE, %RDX_LP +- jb L(less_vec_no_vdup) +-# ifdef USE_LESS_VEC_MASK_STORE +- xorl %esi, %esi +-# endif +-# if VEC_SIZE <= 16 +- BZERO_ZERO_VEC0 () +-# endif +- cmp $(VEC_SIZE * 2), %RDX_LP +- ja L(stosb_more_2x_vec) +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx) +- VZEROUPPER_RETURN +-END (BZERO_SYMBOL(__bzero, unaligned_erms)) +- + # if defined SHARED && IS_IN (libc) + ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + cmp %RDX_LP, %RCX_LP +@@ -282,7 +228,6 @@ L(last_2x_vec): + #ifdef USE_LESS_VEC_MASK_STORE + .p2align 4,, 10 + L(less_vec): +-L(less_vec_no_vdup): + L(less_vec_from_wmemset): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 +@@ -430,9 +375,6 @@ L(less_vec): + xmm). This is only does anything for AVX2. */ + MEMSET_VDUP_TO_VEC0_LOW () + L(less_vec_from_wmemset): +-#if VEC_SIZE > 16 +-L(less_vec_no_vdup): +-#endif + #endif + L(cross_page): + #if VEC_SIZE > 32 +@@ -445,9 +387,6 @@ L(cross_page): + #endif + #ifndef USE_XMM_LESS_VEC + MOVQ %XMM0, %SET_REG64 +-#endif +-#if VEC_SIZE <= 16 +-L(less_vec_no_vdup): + #endif + cmpl $8, %edx + jge L(between_8_15) diff --git a/SOURCES/glibc-upstream-2.34-278.patch b/SOURCES/glibc-upstream-2.34-278.patch new file mode 100644 index 0000000..26b047f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-278.patch @@ -0,0 +1,460 @@ +commit 8ab861d295b90177b89288a2bc95c5de5e4e5bc6 +Author: Sunil K Pandey +Date: Sun Feb 27 16:39:47 2022 -0800 + + x86_64: Implement evex512 version of strlen, strnlen, wcslen and wcsnlen + + This patch implements following evex512 version of string functions. + Perf gain for evex512 version is up to 50% as compared to evex, + depending on length and alignment. + + Placeholder function, not used by any processor at the moment. + + - String length function using 512 bit vectors. + - String N length using 512 bit vectors. + - Wide string length using 512 bit vectors. + - Wide string N length using 512 bit vectors. + + Reviewed-by: Noah Goldstein + (cherry picked from commit 9c66efb86fe384f77435f7e326333fb2e4e10676) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 67401162d526f664..4d4ad2a3686b5bc3 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -87,6 +87,7 @@ sysdep_routines += \ + strlen-avx2 \ + strlen-avx2-rtm \ + strlen-evex \ ++ strlen-evex512 \ + strlen-sse2 \ + strncase_l-avx2 \ + strncase_l-avx2-rtm \ +@@ -115,6 +116,7 @@ sysdep_routines += \ + strnlen-avx2 \ + strnlen-avx2-rtm \ + strnlen-evex \ ++ strnlen-evex512 \ + strnlen-sse2 \ + strpbrk-c \ + strpbrk-sse2 \ +@@ -148,6 +150,7 @@ sysdep_routines += \ + wcslen-avx2 \ + wcslen-avx2-rtm \ + wcslen-evex \ ++ wcslen-evex512 \ + wcslen-sse2 \ + wcslen-sse4_1 \ + wcsncmp-avx2 \ +@@ -158,6 +161,7 @@ sysdep_routines += \ + wcsnlen-avx2-rtm \ + wcsnlen-c \ + wcsnlen-evex \ ++ wcsnlen-evex512 \ + wcsnlen-sse4_1 \ + wcsrchr-avx2 \ + wcsrchr-avx2-rtm \ +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index d990a7149489efd9..6b75a7106e174bce 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -317,6 +317,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strlen_evex) ++ IFUNC_IMPL_ADD (array, i, strlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strlen_evex512) + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/strnlen.c. */ +@@ -335,6 +340,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __strnlen_evex) ++ IFUNC_IMPL_ADD (array, i, strnlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strnlen_evex512) + IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ +@@ -714,6 +724,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcslen_evex) ++ IFUNC_IMPL_ADD (array, i, wcslen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcslen_evex512) + IFUNC_IMPL_ADD (array, i, wcslen, + CPU_FEATURE_USABLE (SSE4_1), + __wcslen_sse4_1) +@@ -735,6 +750,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (BMI2)), + __wcsnlen_evex) ++ IFUNC_IMPL_ADD (array, i, wcsnlen, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __wcsnlen_evex512) + IFUNC_IMPL_ADD (array, i, wcsnlen, + CPU_FEATURE_USABLE (SSE4_1), + __wcsnlen_sse4_1) +diff --git a/sysdeps/x86_64/multiarch/strlen-evex-base.S b/sysdeps/x86_64/multiarch/strlen-evex-base.S +new file mode 100644 +index 0000000000000000..278c899691d89ba7 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strlen-evex-base.S +@@ -0,0 +1,302 @@ ++/* Placeholder function, not used by any processor at the moment. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if IS_IN (libc) ++ ++# include ++ ++# ifdef USE_AS_WCSLEN ++# define VPCMP vpcmpd ++# define VPTESTN vptestnmd ++# define VPMINU vpminud ++# define CHAR_SIZE 4 ++# else ++# define VPCMP vpcmpb ++# define VPTESTN vptestnmb ++# define VPMINU vpminub ++# define CHAR_SIZE 1 ++# endif ++ ++# define XMM0 xmm16 ++# define PAGE_SIZE 4096 ++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) ++ ++# if VEC_SIZE == 64 ++# define KMOV kmovq ++# define KORTEST kortestq ++# define RAX rax ++# define RCX rcx ++# define RDX rdx ++# define SHR shrq ++# define TEXTSUFFIX evex512 ++# define VMM0 zmm16 ++# define VMM1 zmm17 ++# define VMM2 zmm18 ++# define VMM3 zmm19 ++# define VMM4 zmm20 ++# define VMOVA vmovdqa64 ++# elif VEC_SIZE == 32 ++/* Currently Unused. */ ++# define KMOV kmovd ++# define KORTEST kortestd ++# define RAX eax ++# define RCX ecx ++# define RDX edx ++# define SHR shrl ++# define TEXTSUFFIX evex256 ++# define VMM0 ymm16 ++# define VMM1 ymm17 ++# define VMM2 ymm18 ++# define VMM3 ymm19 ++# define VMM4 ymm20 ++# define VMOVA vmovdqa32 ++# endif ++ ++ .section .text.TEXTSUFFIX, "ax", @progbits ++/* Aligning entry point to 64 byte, provides better performance for ++ one vector length string. */ ++ENTRY_P2ALIGN (STRLEN, 6) ++# ifdef USE_AS_STRNLEN ++ /* Check zero length. */ ++ test %RSI_LP, %RSI_LP ++ jz L(ret_max) ++# ifdef __ILP32__ ++ /* Clear the upper 32 bits. */ ++ movl %esi, %esi ++# endif ++# endif ++ ++ movl %edi, %eax ++ vpxorq %XMM0, %XMM0, %XMM0 ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ ja L(page_cross) ++ ++ /* Compare [w]char for null, mask bit will be set for match. */ ++ VPCMP $0, (%rdi), %VMM0, %k0 ++ KMOV %k0, %RAX ++ test %RAX, %RAX ++ jz L(align_more) ++ ++ bsf %RAX, %RAX ++# ifdef USE_AS_STRNLEN ++ cmpq %rsi, %rax ++ cmovnb %rsi, %rax ++# endif ++ ret ++ ++ /* At this point vector max length reached. */ ++# ifdef USE_AS_STRNLEN ++ .p2align 4,,3 ++L(ret_max): ++ movq %rsi, %rax ++ ret ++# endif ++ ++L(align_more): ++ leaq VEC_SIZE(%rdi), %rax ++ /* Align rax to VEC_SIZE. */ ++ andq $-VEC_SIZE, %rax ++# ifdef USE_AS_STRNLEN ++ movq %rax, %rdx ++ subq %rdi, %rdx ++# ifdef USE_AS_WCSLEN ++ SHR $2, %RDX ++# endif ++ /* At this point rdx contains [w]chars already compared. */ ++ subq %rsi, %rdx ++ jae L(ret_max) ++ negq %rdx ++ /* At this point rdx contains number of w[char] needs to go. ++ Now onwards rdx will keep decrementing with each compare. */ ++# endif ++ ++ /* Loop unroll 4 times for 4 vector loop. */ ++ VPCMP $0, (%rax), %VMM0, %k0 ++ KMOV %k0, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x1) ++ ++# ifdef USE_AS_STRNLEN ++ subq $CHAR_PER_VEC, %rdx ++ jbe L(ret_max) ++# endif ++ ++ VPCMP $0, VEC_SIZE(%rax), %VMM0, %k0 ++ KMOV %k0, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x2) ++ ++# ifdef USE_AS_STRNLEN ++ subq $CHAR_PER_VEC, %rdx ++ jbe L(ret_max) ++# endif ++ ++ VPCMP $0, (VEC_SIZE * 2)(%rax), %VMM0, %k0 ++ KMOV %k0, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x3) ++ ++# ifdef USE_AS_STRNLEN ++ subq $CHAR_PER_VEC, %rdx ++ jbe L(ret_max) ++# endif ++ ++ VPCMP $0, (VEC_SIZE * 3)(%rax), %VMM0, %k0 ++ KMOV %k0, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x4) ++ ++# ifdef USE_AS_STRNLEN ++ subq $CHAR_PER_VEC, %rdx ++ jbe L(ret_max) ++ /* Save pointer before 4 x VEC_SIZE alignment. */ ++ movq %rax, %rcx ++# endif ++ ++ /* Align address to VEC_SIZE * 4 for loop. */ ++ andq $-(VEC_SIZE * 4), %rax ++ ++# ifdef USE_AS_STRNLEN ++ subq %rax, %rcx ++# ifdef USE_AS_WCSLEN ++ SHR $2, %RCX ++# endif ++ /* rcx contains number of [w]char will be recompared due to ++ alignment fixes. rdx must be incremented by rcx to offset ++ alignment adjustment. */ ++ addq %rcx, %rdx ++ /* Need jump as we don't want to add/subtract rdx for first ++ iteration of 4 x VEC_SIZE aligned loop. */ ++ jmp L(loop_entry) ++# endif ++ ++ .p2align 4,,11 ++L(loop): ++# ifdef USE_AS_STRNLEN ++ subq $(CHAR_PER_VEC * 4), %rdx ++ jbe L(ret_max) ++L(loop_entry): ++# endif ++ /* VPMINU and VPCMP combination provide better performance as ++ compared to alternative combinations. */ ++ VMOVA (VEC_SIZE * 4)(%rax), %VMM1 ++ VPMINU (VEC_SIZE * 5)(%rax), %VMM1, %VMM2 ++ VMOVA (VEC_SIZE * 6)(%rax), %VMM3 ++ VPMINU (VEC_SIZE * 7)(%rax), %VMM3, %VMM4 ++ ++ VPTESTN %VMM2, %VMM2, %k0 ++ VPTESTN %VMM4, %VMM4, %k1 ++ ++ subq $-(VEC_SIZE * 4), %rax ++ KORTEST %k0, %k1 ++ jz L(loop) ++ ++ VPTESTN %VMM1, %VMM1, %k2 ++ KMOV %k2, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x1) ++ ++ KMOV %k0, %RCX ++ /* At this point, if k0 is non zero, null char must be in the ++ second vector. */ ++ test %RCX, %RCX ++ jnz L(ret_vec_x2) ++ ++ VPTESTN %VMM3, %VMM3, %k3 ++ KMOV %k3, %RCX ++ test %RCX, %RCX ++ jnz L(ret_vec_x3) ++ /* At this point null [w]char must be in the fourth vector so no ++ need to check. */ ++ KMOV %k1, %RCX ++ ++ /* Fourth, third, second vector terminating are pretty much ++ same, implemented this way to avoid branching and reuse code ++ from pre loop exit condition. */ ++L(ret_vec_x4): ++ bsf %RCX, %RCX ++ subq %rdi, %rax ++# ifdef USE_AS_WCSLEN ++ subq $-(VEC_SIZE * 3), %rax ++ shrq $2, %rax ++ addq %rcx, %rax ++# else ++ leaq (VEC_SIZE * 3)(%rcx, %rax), %rax ++# endif ++# ifdef USE_AS_STRNLEN ++ cmpq %rsi, %rax ++ cmovnb %rsi, %rax ++# endif ++ ret ++ ++L(ret_vec_x3): ++ bsf %RCX, %RCX ++ subq %rdi, %rax ++# ifdef USE_AS_WCSLEN ++ subq $-(VEC_SIZE * 2), %rax ++ shrq $2, %rax ++ addq %rcx, %rax ++# else ++ leaq (VEC_SIZE * 2)(%rcx, %rax), %rax ++# endif ++# ifdef USE_AS_STRNLEN ++ cmpq %rsi, %rax ++ cmovnb %rsi, %rax ++# endif ++ ret ++ ++L(ret_vec_x2): ++ subq $-VEC_SIZE, %rax ++L(ret_vec_x1): ++ bsf %RCX, %RCX ++ subq %rdi, %rax ++# ifdef USE_AS_WCSLEN ++ shrq $2, %rax ++# endif ++ addq %rcx, %rax ++# ifdef USE_AS_STRNLEN ++ cmpq %rsi, %rax ++ cmovnb %rsi, %rax ++# endif ++ ret ++ ++L(page_cross): ++ movl %eax, %ecx ++# ifdef USE_AS_WCSLEN ++ andl $(VEC_SIZE - 1), %ecx ++ sarl $2, %ecx ++# endif ++ /* ecx contains number of w[char] to be skipped as a result ++ of address alignment. */ ++ xorq %rdi, %rax ++ VPCMP $0, (PAGE_SIZE - VEC_SIZE)(%rax), %VMM0, %k0 ++ KMOV %k0, %RAX ++ /* Ignore number of character for alignment adjustment. */ ++ SHR %cl, %RAX ++ jz L(align_more) ++ ++ bsf %RAX, %RAX ++# ifdef USE_AS_STRNLEN ++ cmpq %rsi, %rax ++ cmovnb %rsi, %rax ++# endif ++ ret ++ ++END (STRLEN) ++#endif +diff --git a/sysdeps/x86_64/multiarch/strlen-evex512.S b/sysdeps/x86_64/multiarch/strlen-evex512.S +new file mode 100644 +index 0000000000000000..116f8981c8954e2e +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strlen-evex512.S +@@ -0,0 +1,7 @@ ++#ifndef STRLEN ++# define STRLEN __strlen_evex512 ++#endif ++ ++#define VEC_SIZE 64 ++ ++#include "strlen-evex-base.S" +diff --git a/sysdeps/x86_64/multiarch/strnlen-evex512.S b/sysdeps/x86_64/multiarch/strnlen-evex512.S +new file mode 100644 +index 0000000000000000..0b7f220214a7c33c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strnlen-evex512.S +@@ -0,0 +1,4 @@ ++#define STRLEN __strnlen_evex512 ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-evex512.S" +diff --git a/sysdeps/x86_64/multiarch/wcslen-evex512.S b/sysdeps/x86_64/multiarch/wcslen-evex512.S +new file mode 100644 +index 0000000000000000..f59c372b78b4fb8c +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcslen-evex512.S +@@ -0,0 +1,4 @@ ++#define STRLEN __wcslen_evex512 ++#define USE_AS_WCSLEN 1 ++ ++#include "strlen-evex512.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex512.S b/sysdeps/x86_64/multiarch/wcsnlen-evex512.S +new file mode 100644 +index 0000000000000000..73dcf2f210a85aac +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/wcsnlen-evex512.S +@@ -0,0 +1,5 @@ ++#define STRLEN __wcsnlen_evex512 ++#define USE_AS_WCSLEN 1 ++#define USE_AS_STRNLEN 1 ++ ++#include "strlen-evex512.S" diff --git a/SOURCES/glibc-upstream-2.34-279.patch b/SOURCES/glibc-upstream-2.34-279.patch new file mode 100644 index 0000000..21c99ad --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-279.patch @@ -0,0 +1,33 @@ +commit f6bc52f080e4a0195c707c01f54e2eae0ff89010 +Author: H.J. Lu +Date: Fri May 20 19:21:48 2022 -0700 + + x86-64: Ignore r_addend for R_X86_64_GLOB_DAT/R_X86_64_JUMP_SLOT + + According to x86-64 psABI, r_addend should be ignored for R_X86_64_GLOB_DAT + and R_X86_64_JUMP_SLOT. Since linkers always set their r_addends to 0, we + can ignore their r_addends. + + Reviewed-by: Fangrui Song + (cherry picked from commit f8587a61892cbafd98ce599131bf4f103466f084) + +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index 94296719d4d9fb82..742682517179fab5 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -347,11 +347,13 @@ and creates an unsatisfiable circular dependency.\n", + # endif + /* Set to symbol size plus addend. */ + value = sym->st_size; ++ *reloc_addr = value + reloc->r_addend; ++ break; + # endif +- /* Fall through. */ ++ + case R_X86_64_GLOB_DAT: + case R_X86_64_JUMP_SLOT: +- *reloc_addr = value + reloc->r_addend; ++ *reloc_addr = value; + break; + + # ifndef RESOLVE_CONFLICT_FIND_MAP diff --git a/SOURCES/glibc-upstream-2.34-280.patch b/SOURCES/glibc-upstream-2.34-280.patch new file mode 100644 index 0000000..95a58fe --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-280.patch @@ -0,0 +1,356 @@ +commit 82a707aeb74f23bb1783af0f9e93790a2038ff7e +Author: Raghuveer Devulapalli +Date: Mon Jun 6 12:17:43 2022 -0700 + + x86_64: Add strstr function with 512-bit EVEX + + Adding a 512-bit EVEX version of strstr. The algorithm works as follows: + + (1) We spend a few cycles at the begining to peek into the needle. We + locate an edge in the needle (first occurance of 2 consequent distinct + characters) and also store the first 64-bytes into a zmm register. + + (2) We search for the edge in the haystack by looking into one cache + line of the haystack at a time. This avoids having to read past a page + boundary which can cause a seg fault. + + (3) If an edge is found in the haystack we first compare the first + 64-bytes of the needle (already stored in a zmm register) before we + proceed with a full string compare performed byte by byte. + + Benchmarking results: (old = strstr_sse2_unaligned, new = strstr_avx512) + + Geometric mean of all benchmarks: new / old = 0.66 + + Difficult skiptable(0) : new / old = 0.02 + Difficult skiptable(1) : new / old = 0.01 + Difficult 2-way : new / old = 0.25 + Difficult testing first 2 : new / old = 1.26 + Difficult skiptable(0) : new / old = 0.05 + Difficult skiptable(1) : new / old = 0.06 + Difficult 2-way : new / old = 0.26 + Difficult testing first 2 : new / old = 1.05 + Difficult skiptable(0) : new / old = 0.42 + Difficult skiptable(1) : new / old = 0.24 + Difficult 2-way : new / old = 0.21 + Difficult testing first 2 : new / old = 1.04 + Reviewed-by: H.J. Lu + + (cherry picked from commit 5082a287d5e9a1f9cb98b7c982a708a3684f1d5c) + + x86: Remove __mmask intrinsics in strstr-avx512.c + + The intrinsics are not available before GCC7 and using standard + operators generates code of equivalent or better quality. + + Removed: + _cvtmask64_u64 + _kshiftri_mask64 + _kand_mask64 + + Geometric Mean of 5 Runs of Full Benchmark Suite New / Old: 0.958 + + (cherry picked from commit f2698954ff9c2f9626d4bcb5a30eb5729714e0b0) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 4d4ad2a3686b5bc3..0e39e63ef6be6a86 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -126,6 +126,7 @@ sysdep_routines += \ + strrchr-sse2 \ + strspn-c \ + strspn-sse2 \ ++ strstr-avx512 \ + strstr-sse2-unaligned \ + varshift \ + # sysdep_routines +@@ -133,6 +134,7 @@ CFLAGS-varshift.c += -msse4 + CFLAGS-strcspn-c.c += -msse4 + CFLAGS-strpbrk-c.c += -msse4 + CFLAGS-strspn-c.c += -msse4 ++CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 + endif + + ifeq ($(subdir),wcsmbs) +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index 6b75a7106e174bce..043821278fdb6d8f 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -633,6 +633,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + /* Support sysdeps/x86_64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, ++ IFUNC_IMPL_ADD (array, i, strstr, ++ (CPU_FEATURE_USABLE (AVX512VL) ++ && CPU_FEATURE_USABLE (AVX512BW) ++ && CPU_FEATURE_USABLE (AVX512DQ) ++ && CPU_FEATURE_USABLE (BMI2)), ++ __strstr_avx512) + IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2)) + +diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c +new file mode 100644 +index 0000000000000000..e44c1a05dc0007e5 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/strstr-avx512.c +@@ -0,0 +1,218 @@ ++/* strstr optimized with 512-bit AVX-512 instructions ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++#define FULL_MMASK64 0xffffffffffffffff ++#define ONE_64BIT 0x1ull ++#define ZMM_SIZE_IN_BYTES 64 ++#define PAGESIZE 4096 ++ ++#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__) ++#define kshiftri_mask64(x, y) ((x) >> (y)) ++#define kand_mask64(x, y) ((x) & (y)) ++ ++/* ++ Returns the index of the first edge within the needle, returns 0 if no edge ++ is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg' ++ */ ++static inline size_t ++find_edge_in_needle (const char *ned) ++{ ++ size_t ind = 0; ++ while (ned[ind + 1] != '\0') ++ { ++ if (ned[ind] != ned[ind + 1]) ++ return ind; ++ else ++ ind = ind + 1; ++ } ++ return 0; ++} ++ ++/* ++ Compare needle with haystack byte by byte at specified location ++ */ ++static inline bool ++verify_string_match (const char *hay, const size_t hay_index, const char *ned, ++ size_t ind) ++{ ++ while (ned[ind] != '\0') ++ { ++ if (ned[ind] != hay[hay_index + ind]) ++ return false; ++ ind = ind + 1; ++ } ++ return true; ++} ++ ++/* ++ Compare needle with haystack at specified location. The first 64 bytes are ++ compared using a ZMM register. ++ */ ++static inline bool ++verify_string_match_avx512 (const char *hay, const size_t hay_index, ++ const char *ned, const __mmask64 ned_mask, ++ const __m512i ned_zmm) ++{ ++ /* check first 64 bytes using zmm and then scalar */ ++ __m512i hay_zmm = _mm512_loadu_si512 (hay + hay_index); // safe to do so ++ __mmask64 match = _mm512_mask_cmpneq_epi8_mask (ned_mask, hay_zmm, ned_zmm); ++ if (match != 0x0) // failed the first few chars ++ return false; ++ else if (ned_mask == FULL_MMASK64) ++ return verify_string_match (hay, hay_index, ned, ZMM_SIZE_IN_BYTES); ++ return true; ++} ++ ++char * ++__strstr_avx512 (const char *haystack, const char *ned) ++{ ++ char first = ned[0]; ++ if (first == '\0') ++ return (char *)haystack; ++ if (ned[1] == '\0') ++ return (char *)strchr (haystack, ned[0]); ++ ++ size_t edge = find_edge_in_needle (ned); ++ ++ /* ensure haystack is as long as the pos of edge in needle */ ++ for (int ii = 0; ii < edge; ++ii) ++ { ++ if (haystack[ii] == '\0') ++ return NULL; ++ } ++ ++ /* ++ Load 64 bytes of the needle and save it to a zmm register ++ Read one cache line at a time to avoid loading across a page boundary ++ */ ++ __mmask64 ned_load_mask = _bzhi_u64 ( ++ FULL_MMASK64, 64 - ((uintptr_t) (ned) & 63)); ++ __m512i ned_zmm = _mm512_maskz_loadu_epi8 (ned_load_mask, ned); ++ __mmask64 ned_nullmask ++ = _mm512_mask_testn_epi8_mask (ned_load_mask, ned_zmm, ned_zmm); ++ ++ if (__glibc_unlikely (ned_nullmask == 0x0)) ++ { ++ ned_zmm = _mm512_loadu_si512 (ned); ++ ned_nullmask = _mm512_testn_epi8_mask (ned_zmm, ned_zmm); ++ ned_load_mask = ned_nullmask ^ (ned_nullmask - ONE_64BIT); ++ if (ned_nullmask != 0x0) ++ ned_load_mask = ned_load_mask >> 1; ++ } ++ else ++ { ++ ned_load_mask = ned_nullmask ^ (ned_nullmask - ONE_64BIT); ++ ned_load_mask = ned_load_mask >> 1; ++ } ++ const __m512i ned0 = _mm512_set1_epi8 (ned[edge]); ++ const __m512i ned1 = _mm512_set1_epi8 (ned[edge + 1]); ++ ++ /* ++ Read the bytes of haystack in the current cache line ++ */ ++ size_t hay_index = edge; ++ __mmask64 loadmask = _bzhi_u64 ( ++ FULL_MMASK64, 64 - ((uintptr_t) (haystack + hay_index) & 63)); ++ /* First load is a partial cache line */ ++ __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index); ++ /* Search for NULL and compare only till null char */ ++ uint64_t nullmask ++ = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); ++ uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); ++ cmpmask = cmpmask & cvtmask64_u64 (loadmask); ++ /* Search for the 2 charaters of needle */ ++ __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); ++ __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); ++ k1 = kshiftri_mask64 (k1, 1); ++ /* k2 masks tell us if both chars from needle match */ ++ uint64_t k2 = cvtmask64_u64 (kand_mask64 (k0, k1)) & cmpmask; ++ /* For every match, search for the entire needle for a full match */ ++ while (k2) ++ { ++ uint64_t bitcount = _tzcnt_u64 (k2); ++ k2 = _blsr_u64 (k2); ++ size_t match_pos = hay_index + bitcount - edge; ++ if (((uintptr_t) (haystack + match_pos) & (PAGESIZE - 1)) ++ < PAGESIZE - 1 - ZMM_SIZE_IN_BYTES) ++ { ++ /* ++ * Use vector compare as long as you are not crossing a page ++ */ ++ if (verify_string_match_avx512 (haystack, match_pos, ned, ++ ned_load_mask, ned_zmm)) ++ return (char *)haystack + match_pos; ++ } ++ else ++ { ++ if (verify_string_match (haystack, match_pos, ned, 0)) ++ return (char *)haystack + match_pos; ++ } ++ } ++ /* We haven't checked for potential match at the last char yet */ ++ haystack = (const char *)(((uintptr_t) (haystack + hay_index) | 63)); ++ hay_index = 0; ++ ++ /* ++ Loop over one cache line at a time to prevent reading over page ++ boundary ++ */ ++ __m512i hay1; ++ while (nullmask == 0) ++ { ++ hay0 = _mm512_loadu_si512 (haystack + hay_index); ++ hay1 = _mm512_load_si512 (haystack + hay_index ++ + 1); // Always 64 byte aligned ++ nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); ++ /* Compare only till null char */ ++ cmpmask = nullmask ^ (nullmask - ONE_64BIT); ++ k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); ++ k1 = _mm512_cmpeq_epi8_mask (hay1, ned1); ++ /* k2 masks tell us if both chars from needle match */ ++ k2 = cvtmask64_u64 (kand_mask64 (k0, k1)) & cmpmask; ++ /* For every match, compare full strings for potential match */ ++ while (k2) ++ { ++ uint64_t bitcount = _tzcnt_u64 (k2); ++ k2 = _blsr_u64 (k2); ++ size_t match_pos = hay_index + bitcount - edge; ++ if (((uintptr_t) (haystack + match_pos) & (PAGESIZE - 1)) ++ < PAGESIZE - 1 - ZMM_SIZE_IN_BYTES) ++ { ++ /* ++ * Use vector compare as long as you are not crossing a page ++ */ ++ if (verify_string_match_avx512 (haystack, match_pos, ned, ++ ned_load_mask, ned_zmm)) ++ return (char *)haystack + match_pos; ++ } ++ else ++ { ++ /* Compare byte by byte */ ++ if (verify_string_match (haystack, match_pos, ned, 0)) ++ return (char *)haystack + match_pos; ++ } ++ } ++ hay_index += ZMM_SIZE_IN_BYTES; ++ } ++ return NULL; ++} +diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c +index 848601bde7583ca3..9474d6234e9b62d3 100644 +--- a/sysdeps/x86_64/multiarch/strstr.c ++++ b/sysdeps/x86_64/multiarch/strstr.c +@@ -35,16 +35,32 @@ + + extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden; + extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; ++extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden; + + #include "init-arch.h" + + /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ + extern __typeof (__redirect_strstr) __libc_strstr; +-libc_ifunc (__libc_strstr, +- HAS_ARCH_FEATURE (Fast_Unaligned_Load) +- ? __strstr_sse2_unaligned +- : __strstr_sse2) + ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features *cpu_features = __get_cpu_features (); ++ ++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) ++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) ++ return __strstr_avx512; ++ ++ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) ++ return __strstr_sse2_unaligned; ++ ++ return __strstr_sse2; ++} ++ ++libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ()); + #undef strstr + strong_alias (__libc_strstr, strstr) diff --git a/SOURCES/glibc-upstream-2.34-281.patch b/SOURCES/glibc-upstream-2.34-281.patch new file mode 100644 index 0000000..3a32378 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-281.patch @@ -0,0 +1,385 @@ +commit 70be93d1c58916d289a5e6e7c7d9b989707a9e41 +Author: Noah Goldstein +Date: Mon Jun 6 21:11:27 2022 -0700 + + x86: Create header for VEC classes in x86 strings library + + This patch does not touch any existing code and is only meant to be a + tool for future patches so that simple source files can more easily be + maintained to target multiple VEC classes. + + There is no difference in the objdump of libc.so before and after this + patch. + Reviewed-by: H.J. Lu + + (cherry picked from commit 8a780a6b910023e71f3173f37f0793834c047554) + +diff --git a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h +new file mode 100644 +index 0000000000000000..3f531dd47fceefe9 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h +@@ -0,0 +1,34 @@ ++/* Common config for AVX-RTM VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _AVX_RTM_VECS_H ++#define _AVX_RTM_VECS_H 1 ++ ++#define ZERO_UPPER_VEC_REGISTERS_RETURN \ ++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST ++ ++#define VZEROUPPER_RETURN jmp L(return_vzeroupper) ++ ++#define USE_WITH_RTM 1 ++#include "avx-vecs.h" ++ ++#undef SECTION ++#define SECTION(p) p##.avx.rtm ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/avx-vecs.h b/sysdeps/x86_64/multiarch/avx-vecs.h +new file mode 100644 +index 0000000000000000..89680f5db827c332 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/avx-vecs.h +@@ -0,0 +1,47 @@ ++/* Common config for AVX VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _AVX_VECS_H ++#define _AVX_VECS_H 1 ++ ++#ifdef VEC_SIZE ++# error "Multiple VEC configs included!" ++#endif ++ ++#define VEC_SIZE 32 ++#include "vec-macros.h" ++ ++#define USE_WITH_AVX 1 ++#define SECTION(p) p##.avx ++ ++/* 4-byte mov instructions with AVX2. */ ++#define MOV_SIZE 4 ++/* 1 (ret) + 3 (vzeroupper). */ ++#define RET_SIZE 4 ++#define VZEROUPPER vzeroupper ++ ++#define VMOVU vmovdqu ++#define VMOVA vmovdqa ++#define VMOVNT vmovntdq ++ ++/* Often need to access xmm portion. */ ++#define VEC_xmm VEC_any_xmm ++#define VEC VEC_any_ymm ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/evex-vecs-common.h b/sysdeps/x86_64/multiarch/evex-vecs-common.h +new file mode 100644 +index 0000000000000000..99806ebcd7bde53d +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/evex-vecs-common.h +@@ -0,0 +1,39 @@ ++/* Common config for EVEX256 and EVEX512 VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _EVEX_VECS_COMMON_H ++#define _EVEX_VECS_COMMON_H 1 ++ ++#include "vec-macros.h" ++ ++/* 6-byte mov instructions with EVEX. */ ++#define MOV_SIZE 6 ++/* No vzeroupper needed. */ ++#define RET_SIZE 1 ++#define VZEROUPPER ++ ++#define VMOVU vmovdqu64 ++#define VMOVA vmovdqa64 ++#define VMOVNT vmovntdq ++ ++#define VEC_xmm VEC_hi_xmm ++#define VEC_ymm VEC_hi_ymm ++#define VEC_zmm VEC_hi_zmm ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/evex256-vecs.h b/sysdeps/x86_64/multiarch/evex256-vecs.h +new file mode 100644 +index 0000000000000000..222ba46dc74cfcbd +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/evex256-vecs.h +@@ -0,0 +1,35 @@ ++/* Common config for EVEX256 VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _EVEX256_VECS_H ++#define _EVEX256_VECS_H 1 ++ ++#ifdef VEC_SIZE ++# error "Multiple VEC configs included!" ++#endif ++ ++#define VEC_SIZE 32 ++#include "evex-vecs-common.h" ++ ++#define USE_WITH_EVEX256 1 ++#define SECTION(p) p##.evex ++ ++#define VEC VEC_ymm ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/evex512-vecs.h b/sysdeps/x86_64/multiarch/evex512-vecs.h +new file mode 100644 +index 0000000000000000..d1784d5368d8cebe +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/evex512-vecs.h +@@ -0,0 +1,35 @@ ++/* Common config for EVEX512 VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _EVEX512_VECS_H ++#define _EVEX512_VECS_H 1 ++ ++#ifdef VEC_SIZE ++# error "Multiple VEC configs included!" ++#endif ++ ++#define VEC_SIZE 64 ++#include "evex-vecs-common.h" ++ ++#define USE_WITH_EVEX512 1 ++#define SECTION(p) p##.evex512 ++ ++#define VEC VEC_zmm ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/sse2-vecs.h b/sysdeps/x86_64/multiarch/sse2-vecs.h +new file mode 100644 +index 0000000000000000..2b77a59d56ff2660 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/sse2-vecs.h +@@ -0,0 +1,47 @@ ++/* Common config for SSE2 VECs ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _SSE2_VECS_H ++#define _SSE2_VECS_H 1 ++ ++#ifdef VEC_SIZE ++# error "Multiple VEC configs included!" ++#endif ++ ++#define VEC_SIZE 16 ++#include "vec-macros.h" ++ ++#define USE_WITH_SSE2 1 ++#define SECTION(p) p ++ ++/* 3-byte mov instructions with SSE2. */ ++#define MOV_SIZE 3 ++/* No vzeroupper needed. */ ++#define RET_SIZE 1 ++#define VZEROUPPER ++ ++#define VMOVU movups ++#define VMOVA movaps ++#define VMOVNT movntdq ++ ++#define VEC_xmm VEC_any_xmm ++#define VEC VEC_any_xmm ++ ++ ++#endif +diff --git a/sysdeps/x86_64/multiarch/vec-macros.h b/sysdeps/x86_64/multiarch/vec-macros.h +new file mode 100644 +index 0000000000000000..9f3ffecede9feb26 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/vec-macros.h +@@ -0,0 +1,90 @@ ++/* Macro helpers for VEC_{type}({vec_num}) ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _VEC_MACROS_H ++#define _VEC_MACROS_H 1 ++ ++#ifndef VEC_SIZE ++# error "Never include this file directly. Always include a vector config." ++#endif ++ ++/* Defines so we can use SSE2 / AVX2 / EVEX / EVEX512 encoding with same ++ VEC(N) values. */ ++#define VEC_hi_xmm0 xmm16 ++#define VEC_hi_xmm1 xmm17 ++#define VEC_hi_xmm2 xmm18 ++#define VEC_hi_xmm3 xmm19 ++#define VEC_hi_xmm4 xmm20 ++#define VEC_hi_xmm5 xmm21 ++#define VEC_hi_xmm6 xmm22 ++#define VEC_hi_xmm7 xmm23 ++#define VEC_hi_xmm8 xmm24 ++#define VEC_hi_xmm9 xmm25 ++#define VEC_hi_xmm10 xmm26 ++#define VEC_hi_xmm11 xmm27 ++#define VEC_hi_xmm12 xmm28 ++#define VEC_hi_xmm13 xmm29 ++#define VEC_hi_xmm14 xmm30 ++#define VEC_hi_xmm15 xmm31 ++ ++#define VEC_hi_ymm0 ymm16 ++#define VEC_hi_ymm1 ymm17 ++#define VEC_hi_ymm2 ymm18 ++#define VEC_hi_ymm3 ymm19 ++#define VEC_hi_ymm4 ymm20 ++#define VEC_hi_ymm5 ymm21 ++#define VEC_hi_ymm6 ymm22 ++#define VEC_hi_ymm7 ymm23 ++#define VEC_hi_ymm8 ymm24 ++#define VEC_hi_ymm9 ymm25 ++#define VEC_hi_ymm10 ymm26 ++#define VEC_hi_ymm11 ymm27 ++#define VEC_hi_ymm12 ymm28 ++#define VEC_hi_ymm13 ymm29 ++#define VEC_hi_ymm14 ymm30 ++#define VEC_hi_ymm15 ymm31 ++ ++#define VEC_hi_zmm0 zmm16 ++#define VEC_hi_zmm1 zmm17 ++#define VEC_hi_zmm2 zmm18 ++#define VEC_hi_zmm3 zmm19 ++#define VEC_hi_zmm4 zmm20 ++#define VEC_hi_zmm5 zmm21 ++#define VEC_hi_zmm6 zmm22 ++#define VEC_hi_zmm7 zmm23 ++#define VEC_hi_zmm8 zmm24 ++#define VEC_hi_zmm9 zmm25 ++#define VEC_hi_zmm10 zmm26 ++#define VEC_hi_zmm11 zmm27 ++#define VEC_hi_zmm12 zmm28 ++#define VEC_hi_zmm13 zmm29 ++#define VEC_hi_zmm14 zmm30 ++#define VEC_hi_zmm15 zmm31 ++ ++#define PRIMITIVE_VEC(vec, num) vec##num ++ ++#define VEC_any_xmm(i) PRIMITIVE_VEC(xmm, i) ++#define VEC_any_ymm(i) PRIMITIVE_VEC(ymm, i) ++#define VEC_any_zmm(i) PRIMITIVE_VEC(zmm, i) ++ ++#define VEC_hi_xmm(i) PRIMITIVE_VEC(VEC_hi_xmm, i) ++#define VEC_hi_ymm(i) PRIMITIVE_VEC(VEC_hi_ymm, i) ++#define VEC_hi_zmm(i) PRIMITIVE_VEC(VEC_hi_zmm, i) ++ ++#endif diff --git a/SOURCES/glibc-upstream-2.34-282.patch b/SOURCES/glibc-upstream-2.34-282.patch new file mode 100644 index 0000000..b1c9c0d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-282.patch @@ -0,0 +1,90 @@ +commit e805606193e1a39956ca5ef73cb44a8796730686 +Author: Noah Goldstein +Date: Mon Jun 6 21:11:28 2022 -0700 + + x86: Add COND_VZEROUPPER that can replace vzeroupper if no `ret` + + The RTM vzeroupper mitigation has no way of replacing inline + vzeroupper not before a return. + + This can be useful when hoisting a vzeroupper to save code size + for example: + + ``` + L(foo): + cmpl %eax, %edx + jz L(bar) + tzcntl %eax, %eax + addq %rdi, %rax + VZEROUPPER_RETURN + + L(bar): + xorl %eax, %eax + VZEROUPPER_RETURN + ``` + + Can become: + + ``` + L(foo): + COND_VZEROUPPER + cmpl %eax, %edx + jz L(bar) + tzcntl %eax, %eax + addq %rdi, %rax + ret + + L(bar): + xorl %eax, %eax + ret + ``` + + This code does not change any existing functionality. + + There is no difference in the objdump of libc.so before and after this + patch. + Reviewed-by: H.J. Lu + + (cherry picked from commit dd5c483b2598f411428df4d8864c15c4b8a3cd68) + +diff --git a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h +index 3f531dd47fceefe9..6ca9f5e6bae7ba72 100644 +--- a/sysdeps/x86_64/multiarch/avx-rtm-vecs.h ++++ b/sysdeps/x86_64/multiarch/avx-rtm-vecs.h +@@ -20,6 +20,7 @@ + #ifndef _AVX_RTM_VECS_H + #define _AVX_RTM_VECS_H 1 + ++#define COND_VZEROUPPER COND_VZEROUPPER_XTEST + #define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h +index 7bebdeb21095eda0..93e44be22e2275f1 100644 +--- a/sysdeps/x86_64/sysdep.h ++++ b/sysdeps/x86_64/sysdep.h +@@ -106,6 +106,24 @@ lose: \ + vzeroupper; \ + ret + ++/* Can be used to replace vzeroupper that is not directly before a ++ return. This is useful when hoisting a vzeroupper from multiple ++ return paths to decrease the total number of vzerouppers and code ++ size. */ ++#define COND_VZEROUPPER_XTEST \ ++ xtest; \ ++ jz 1f; \ ++ vzeroall; \ ++ jmp 2f; \ ++1: \ ++ vzeroupper; \ ++2: ++ ++/* In RTM define this as COND_VZEROUPPER_XTEST. */ ++#ifndef COND_VZEROUPPER ++# define COND_VZEROUPPER vzeroupper ++#endif ++ + /* Zero upper vector registers and return. */ + #ifndef ZERO_UPPER_VEC_REGISTERS_RETURN + # define ZERO_UPPER_VEC_REGISTERS_RETURN \ diff --git a/SOURCES/glibc-upstream-2.34-283.patch b/SOURCES/glibc-upstream-2.34-283.patch new file mode 100644 index 0000000..7745fef --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-283.patch @@ -0,0 +1,696 @@ +commit 4901009dad8b3ab141ac6e0caebe99e03a67f5eb +Author: Noah Goldstein +Date: Mon Jun 6 21:11:30 2022 -0700 + + x86: Optimize memrchr-sse2.S + + The new code: + 1. prioritizes smaller lengths more. + 2. optimizes target placement more carefully. + 3. reuses logic more. + 4. fixes up various inefficiencies in the logic. + + The total code size saving is: 394 bytes + Geometric Mean of all benchmarks New / Old: 0.874 + + Regressions: + 1. The page cross case is now colder, especially re-entry from the + page cross case if a match is not found in the first VEC + (roughly 50%). My general opinion with this patch is this is + acceptable given the "coldness" of this case (less than 4%) and + generally performance improvement in the other far more common + cases. + + 2. There are some regressions 5-15% for medium/large user-arg + lengths that have a match in the first VEC. This is because the + logic was rewritten to optimize finds in the first VEC if the + user-arg length is shorter (where we see roughly 20-50% + performance improvements). It is not always the case this is a + regression. My intuition is some frontend quirk is partially + explaining the data although I haven't been able to find the + root cause. + + Full xcheck passes on x86_64. + Reviewed-by: H.J. Lu + + (cherry picked from commit 731feee3869550e93177e604604c1765d81de571) + +diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S +index cc2001167d77c83c..c2a5902bf9385c67 100644 +--- a/sysdeps/x86_64/memrchr.S ++++ b/sysdeps/x86_64/memrchr.S +@@ -19,362 +19,333 @@ + . */ + + #include ++#define VEC_SIZE 16 ++#define PAGE_SIZE 4096 + + .text +-ENTRY (__memrchr) +- movd %esi, %xmm1 +- +- sub $16, %RDX_LP +- jbe L(length_less16) +- +- punpcklbw %xmm1, %xmm1 +- punpcklbw %xmm1, %xmm1 +- +- add %RDX_LP, %RDI_LP +- pshufd $0, %xmm1, %xmm1 +- +- movdqu (%rdi), %xmm0 +- pcmpeqb %xmm1, %xmm0 +- +-/* Check if there is a match. */ +- pmovmskb %xmm0, %eax +- test %eax, %eax +- jnz L(matches0) +- +- sub $64, %rdi +- mov %edi, %ecx +- and $15, %ecx +- jz L(loop_prolog) +- +- add $16, %rdi +- add $16, %rdx +- and $-16, %rdi +- sub %rcx, %rdx +- +- .p2align 4 +-L(loop_prolog): +- sub $64, %rdx +- jbe L(exit_loop) +- +- movdqa 48(%rdi), %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %eax +- test %eax, %eax +- jnz L(matches48) +- +- movdqa 32(%rdi), %xmm2 +- pcmpeqb %xmm1, %xmm2 +- pmovmskb %xmm2, %eax +- test %eax, %eax +- jnz L(matches32) +- +- movdqa 16(%rdi), %xmm3 +- pcmpeqb %xmm1, %xmm3 +- pmovmskb %xmm3, %eax +- test %eax, %eax +- jnz L(matches16) +- +- movdqa (%rdi), %xmm4 +- pcmpeqb %xmm1, %xmm4 +- pmovmskb %xmm4, %eax +- test %eax, %eax +- jnz L(matches0) +- +- sub $64, %rdi +- sub $64, %rdx +- jbe L(exit_loop) +- +- movdqa 48(%rdi), %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %eax +- test %eax, %eax +- jnz L(matches48) +- +- movdqa 32(%rdi), %xmm2 +- pcmpeqb %xmm1, %xmm2 +- pmovmskb %xmm2, %eax +- test %eax, %eax +- jnz L(matches32) +- +- movdqa 16(%rdi), %xmm3 +- pcmpeqb %xmm1, %xmm3 +- pmovmskb %xmm3, %eax +- test %eax, %eax +- jnz L(matches16) +- +- movdqa (%rdi), %xmm3 +- pcmpeqb %xmm1, %xmm3 +- pmovmskb %xmm3, %eax +- test %eax, %eax +- jnz L(matches0) +- +- mov %edi, %ecx +- and $63, %ecx +- jz L(align64_loop) +- +- add $64, %rdi +- add $64, %rdx +- and $-64, %rdi +- sub %rcx, %rdx +- +- .p2align 4 +-L(align64_loop): +- sub $64, %rdi +- sub $64, %rdx +- jbe L(exit_loop) +- +- movdqa (%rdi), %xmm0 +- movdqa 16(%rdi), %xmm2 +- movdqa 32(%rdi), %xmm3 +- movdqa 48(%rdi), %xmm4 +- +- pcmpeqb %xmm1, %xmm0 +- pcmpeqb %xmm1, %xmm2 +- pcmpeqb %xmm1, %xmm3 +- pcmpeqb %xmm1, %xmm4 +- +- pmaxub %xmm3, %xmm0 +- pmaxub %xmm4, %xmm2 +- pmaxub %xmm0, %xmm2 +- pmovmskb %xmm2, %eax +- +- test %eax, %eax +- jz L(align64_loop) +- +- pmovmskb %xmm4, %eax +- test %eax, %eax +- jnz L(matches48) +- +- pmovmskb %xmm3, %eax +- test %eax, %eax +- jnz L(matches32) +- +- movdqa 16(%rdi), %xmm2 +- +- pcmpeqb %xmm1, %xmm2 +- pcmpeqb (%rdi), %xmm1 +- +- pmovmskb %xmm2, %eax +- test %eax, %eax +- jnz L(matches16) +- +- pmovmskb %xmm1, %eax +- bsr %eax, %eax +- +- add %rdi, %rax ++ENTRY_P2ALIGN(__memrchr, 6) ++#ifdef __ILP32__ ++ /* Clear upper bits. */ ++ mov %RDX_LP, %RDX_LP ++#endif ++ movd %esi, %xmm0 ++ ++ /* Get end pointer. */ ++ leaq (%rdx, %rdi), %rcx ++ ++ punpcklbw %xmm0, %xmm0 ++ punpcklwd %xmm0, %xmm0 ++ pshufd $0, %xmm0, %xmm0 ++ ++ /* Check if we can load 1x VEC without cross a page. */ ++ testl $(PAGE_SIZE - VEC_SIZE), %ecx ++ jz L(page_cross) ++ ++ /* NB: This load happens regardless of whether rdx (len) is zero. Since ++ it doesn't cross a page and the standard gurantees any pointer have ++ at least one-valid byte this load must be safe. For the entire ++ history of the x86 memrchr implementation this has been possible so ++ no code "should" be relying on a zero-length check before this load. ++ The zero-length check is moved to the page cross case because it is ++ 1) pretty cold and including it pushes the hot case len <= VEC_SIZE ++ into 2-cache lines. */ ++ movups -(VEC_SIZE)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ subq $VEC_SIZE, %rdx ++ ja L(more_1x_vec) ++L(ret_vec_x0_test): ++ /* Zero-flag set if eax (src) is zero. Destination unchanged if src is ++ zero. */ ++ bsrl %eax, %eax ++ jz L(ret_0) ++ /* Check if the CHAR match is in bounds. Need to truly zero `eax` here ++ if out of bounds. */ ++ addl %edx, %eax ++ jl L(zero_0) ++ /* Since we subtracted VEC_SIZE from rdx earlier we can just add to base ++ ptr. */ ++ addq %rdi, %rax ++L(ret_0): + ret + +- .p2align 4 +-L(exit_loop): +- add $64, %edx +- cmp $32, %edx +- jbe L(exit_loop_32) +- +- movdqa 48(%rdi), %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %eax +- test %eax, %eax +- jnz L(matches48) +- +- movdqa 32(%rdi), %xmm2 +- pcmpeqb %xmm1, %xmm2 +- pmovmskb %xmm2, %eax +- test %eax, %eax +- jnz L(matches32) +- +- movdqa 16(%rdi), %xmm3 +- pcmpeqb %xmm1, %xmm3 +- pmovmskb %xmm3, %eax +- test %eax, %eax +- jnz L(matches16_1) +- cmp $48, %edx +- jbe L(return_null) +- +- pcmpeqb (%rdi), %xmm1 +- pmovmskb %xmm1, %eax +- test %eax, %eax +- jnz L(matches0_1) +- xor %eax, %eax ++ .p2align 4,, 5 ++L(ret_vec_x0): ++ bsrl %eax, %eax ++ leaq -(VEC_SIZE)(%rcx, %rax), %rax + ret + +- .p2align 4 +-L(exit_loop_32): +- movdqa 48(%rdi), %xmm0 +- pcmpeqb %xmm1, %xmm0 +- pmovmskb %xmm0, %eax +- test %eax, %eax +- jnz L(matches48_1) +- cmp $16, %edx +- jbe L(return_null) +- +- pcmpeqb 32(%rdi), %xmm1 +- pmovmskb %xmm1, %eax +- test %eax, %eax +- jnz L(matches32_1) +- xor %eax, %eax ++ .p2align 4,, 2 ++L(zero_0): ++ xorl %eax, %eax + ret + +- .p2align 4 +-L(matches0): +- bsr %eax, %eax +- add %rdi, %rax +- ret +- +- .p2align 4 +-L(matches16): +- bsr %eax, %eax +- lea 16(%rax, %rdi), %rax +- ret + +- .p2align 4 +-L(matches32): +- bsr %eax, %eax +- lea 32(%rax, %rdi), %rax ++ .p2align 4,, 8 ++L(more_1x_vec): ++ testl %eax, %eax ++ jnz L(ret_vec_x0) ++ ++ /* Align rcx (pointer to string). */ ++ decq %rcx ++ andq $-VEC_SIZE, %rcx ++ ++ movq %rcx, %rdx ++ /* NB: We could consistenyl save 1-byte in this pattern with `movaps ++ %xmm0, %xmm1; pcmpeq IMM8(r), %xmm1; ...`. The reason against it is ++ it adds more frontend uops (even if the moves can be eliminated) and ++ some percentage of the time actual backend uops. */ ++ movaps -(VEC_SIZE)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ subq %rdi, %rdx ++ pmovmskb %xmm1, %eax ++ ++ cmpq $(VEC_SIZE * 2), %rdx ++ ja L(more_2x_vec) ++L(last_2x_vec): ++ subl $VEC_SIZE, %edx ++ jbe L(ret_vec_x0_test) ++ ++ testl %eax, %eax ++ jnz L(ret_vec_x0) ++ ++ movaps -(VEC_SIZE * 2)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ subl $VEC_SIZE, %edx ++ bsrl %eax, %eax ++ jz L(ret_1) ++ addl %edx, %eax ++ jl L(zero_0) ++ addq %rdi, %rax ++L(ret_1): + ret + +- .p2align 4 +-L(matches48): +- bsr %eax, %eax +- lea 48(%rax, %rdi), %rax ++ /* Don't align. Otherwise lose 2-byte encoding in jump to L(page_cross) ++ causes the hot pause (length <= VEC_SIZE) to span multiple cache ++ lines. Naturally aligned % 16 to 8-bytes. */ ++L(page_cross): ++ /* Zero length check. */ ++ testq %rdx, %rdx ++ jz L(zero_0) ++ ++ leaq -1(%rcx), %r8 ++ andq $-(VEC_SIZE), %r8 ++ ++ movaps (%r8), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %esi ++ /* Shift out negative alignment (because we are starting from endptr and ++ working backwards). */ ++ negl %ecx ++ /* 32-bit shift but VEC_SIZE=16 so need to mask the shift count ++ explicitly. */ ++ andl $(VEC_SIZE - 1), %ecx ++ shl %cl, %esi ++ movzwl %si, %eax ++ leaq (%rdi, %rdx), %rcx ++ cmpq %rdi, %r8 ++ ja L(more_1x_vec) ++ subl $VEC_SIZE, %edx ++ bsrl %eax, %eax ++ jz L(ret_2) ++ addl %edx, %eax ++ jl L(zero_1) ++ addq %rdi, %rax ++L(ret_2): + ret + +- .p2align 4 +-L(matches0_1): +- bsr %eax, %eax +- sub $64, %rdx +- add %rax, %rdx +- jl L(return_null) +- add %rdi, %rax ++ /* Fits in aliging bytes. */ ++L(zero_1): ++ xorl %eax, %eax + ret + +- .p2align 4 +-L(matches16_1): +- bsr %eax, %eax +- sub $48, %rdx +- add %rax, %rdx +- jl L(return_null) +- lea 16(%rdi, %rax), %rax ++ .p2align 4,, 5 ++L(ret_vec_x1): ++ bsrl %eax, %eax ++ leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax + ret + +- .p2align 4 +-L(matches32_1): +- bsr %eax, %eax +- sub $32, %rdx +- add %rax, %rdx +- jl L(return_null) +- lea 32(%rdi, %rax), %rax +- ret ++ .p2align 4,, 8 ++L(more_2x_vec): ++ testl %eax, %eax ++ jnz L(ret_vec_x0) + +- .p2align 4 +-L(matches48_1): +- bsr %eax, %eax +- sub $16, %rdx +- add %rax, %rdx +- jl L(return_null) +- lea 48(%rdi, %rax), %rax +- ret ++ movaps -(VEC_SIZE * 2)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ testl %eax, %eax ++ jnz L(ret_vec_x1) + +- .p2align 4 +-L(return_null): +- xor %eax, %eax +- ret + +- .p2align 4 +-L(length_less16_offset0): +- test %edx, %edx +- jz L(return_null) ++ movaps -(VEC_SIZE * 3)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax + +- mov %dl, %cl +- pcmpeqb (%rdi), %xmm1 ++ subq $(VEC_SIZE * 4), %rdx ++ ja L(more_4x_vec) + +- mov $1, %edx +- sal %cl, %edx +- sub $1, %edx ++ addl $(VEC_SIZE), %edx ++ jle L(ret_vec_x2_test) + +- pmovmskb %xmm1, %eax ++L(last_vec): ++ testl %eax, %eax ++ jnz L(ret_vec_x2) + +- and %edx, %eax +- test %eax, %eax +- jz L(return_null) ++ movaps -(VEC_SIZE * 4)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax + +- bsr %eax, %eax +- add %rdi, %rax ++ subl $(VEC_SIZE), %edx ++ bsrl %eax, %eax ++ jz L(ret_3) ++ addl %edx, %eax ++ jl L(zero_2) ++ addq %rdi, %rax ++L(ret_3): + ret + +- .p2align 4 +-L(length_less16): +- punpcklbw %xmm1, %xmm1 +- punpcklbw %xmm1, %xmm1 +- +- add $16, %edx +- +- pshufd $0, %xmm1, %xmm1 +- +- mov %edi, %ecx +- and $15, %ecx +- jz L(length_less16_offset0) +- +- mov %cl, %dh +- mov %ecx, %esi +- add %dl, %dh +- and $-16, %rdi +- +- sub $16, %dh +- ja L(length_less16_part2) +- +- pcmpeqb (%rdi), %xmm1 +- pmovmskb %xmm1, %eax +- +- sar %cl, %eax +- mov %dl, %cl +- +- mov $1, %edx +- sal %cl, %edx +- sub $1, %edx +- +- and %edx, %eax +- test %eax, %eax +- jz L(return_null) +- +- bsr %eax, %eax +- add %rdi, %rax +- add %rsi, %rax ++ .p2align 4,, 6 ++L(ret_vec_x2_test): ++ bsrl %eax, %eax ++ jz L(zero_2) ++ addl %edx, %eax ++ jl L(zero_2) ++ addq %rdi, %rax + ret + +- .p2align 4 +-L(length_less16_part2): +- movdqa 16(%rdi), %xmm2 +- pcmpeqb %xmm1, %xmm2 +- pmovmskb %xmm2, %eax +- +- mov %dh, %cl +- mov $1, %edx +- sal %cl, %edx +- sub $1, %edx +- +- and %edx, %eax ++L(zero_2): ++ xorl %eax, %eax ++ ret + +- test %eax, %eax +- jnz L(length_less16_part2_return) + +- pcmpeqb (%rdi), %xmm1 +- pmovmskb %xmm1, %eax ++ .p2align 4,, 5 ++L(ret_vec_x2): ++ bsrl %eax, %eax ++ leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax ++ ret + +- mov %esi, %ecx +- sar %cl, %eax +- test %eax, %eax +- jz L(return_null) ++ .p2align 4,, 5 ++L(ret_vec_x3): ++ bsrl %eax, %eax ++ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax ++ ret + +- bsr %eax, %eax +- add %rdi, %rax +- add %rsi, %rax ++ .p2align 4,, 8 ++L(more_4x_vec): ++ testl %eax, %eax ++ jnz L(ret_vec_x2) ++ ++ movaps -(VEC_SIZE * 4)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ testl %eax, %eax ++ jnz L(ret_vec_x3) ++ ++ addq $-(VEC_SIZE * 4), %rcx ++ cmpq $(VEC_SIZE * 4), %rdx ++ jbe L(last_4x_vec) ++ ++ /* Offset everything by 4x VEC_SIZE here to save a few bytes at the end ++ keeping the code from spilling to the next cache line. */ ++ addq $(VEC_SIZE * 4 - 1), %rcx ++ andq $-(VEC_SIZE * 4), %rcx ++ leaq (VEC_SIZE * 4)(%rdi), %rdx ++ andq $-(VEC_SIZE * 4), %rdx ++ ++ .p2align 4,, 11 ++L(loop_4x_vec): ++ movaps (VEC_SIZE * -1)(%rcx), %xmm1 ++ movaps (VEC_SIZE * -2)(%rcx), %xmm2 ++ movaps (VEC_SIZE * -3)(%rcx), %xmm3 ++ movaps (VEC_SIZE * -4)(%rcx), %xmm4 ++ pcmpeqb %xmm0, %xmm1 ++ pcmpeqb %xmm0, %xmm2 ++ pcmpeqb %xmm0, %xmm3 ++ pcmpeqb %xmm0, %xmm4 ++ ++ por %xmm1, %xmm2 ++ por %xmm3, %xmm4 ++ por %xmm2, %xmm4 ++ ++ pmovmskb %xmm4, %esi ++ testl %esi, %esi ++ jnz L(loop_end) ++ ++ addq $-(VEC_SIZE * 4), %rcx ++ cmpq %rdx, %rcx ++ jne L(loop_4x_vec) ++ ++ subl %edi, %edx ++ ++ /* Ends up being 1-byte nop. */ ++ .p2align 4,, 2 ++L(last_4x_vec): ++ movaps -(VEC_SIZE)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ cmpl $(VEC_SIZE * 2), %edx ++ jbe L(last_2x_vec) ++ ++ testl %eax, %eax ++ jnz L(ret_vec_x0) ++ ++ ++ movaps -(VEC_SIZE * 2)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ testl %eax, %eax ++ jnz L(ret_vec_end) ++ ++ movaps -(VEC_SIZE * 3)(%rcx), %xmm1 ++ pcmpeqb %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ ++ subl $(VEC_SIZE * 3), %edx ++ ja L(last_vec) ++ bsrl %eax, %eax ++ jz L(ret_4) ++ addl %edx, %eax ++ jl L(zero_3) ++ addq %rdi, %rax ++L(ret_4): + ret + +- .p2align 4 +-L(length_less16_part2_return): +- bsr %eax, %eax +- lea 16(%rax, %rdi), %rax ++ /* Ends up being 1-byte nop. */ ++ .p2align 4,, 3 ++L(loop_end): ++ pmovmskb %xmm1, %eax ++ sall $16, %eax ++ jnz L(ret_vec_end) ++ ++ pmovmskb %xmm2, %eax ++ testl %eax, %eax ++ jnz L(ret_vec_end) ++ ++ pmovmskb %xmm3, %eax ++ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3) ++ then it won't affect the result in esi (VEC4). If ecx is non-zero ++ then CHAR in VEC3 and bsrq will use that position. */ ++ sall $16, %eax ++ orl %esi, %eax ++ bsrl %eax, %eax ++ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax + ret + +-END (__memrchr) ++L(ret_vec_end): ++ bsrl %eax, %eax ++ leaq (VEC_SIZE * -2)(%rax, %rcx), %rax ++ ret ++ /* Use in L(last_4x_vec). In the same cache line. This is just a spare ++ aligning bytes. */ ++L(zero_3): ++ xorl %eax, %eax ++ ret ++ /* 2-bytes from next cache line. */ ++END(__memrchr) + weak_alias (__memrchr, memrchr) diff --git a/SOURCES/glibc-upstream-2.34-284.patch b/SOURCES/glibc-upstream-2.34-284.patch new file mode 100644 index 0000000..846f807 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-284.patch @@ -0,0 +1,624 @@ +commit 83a986e9fbc301e6056dbc9d9ec6888621b60f67 +Author: Noah Goldstein +Date: Mon Jun 6 21:11:31 2022 -0700 + + x86: Optimize memrchr-evex.S + + The new code: + 1. prioritizes smaller user-arg lengths more. + 2. optimizes target placement more carefully + 3. reuses logic more + 4. fixes up various inefficiencies in the logic. The biggest + case here is the `lzcnt` logic for checking returns which + saves either a branch or multiple instructions. + + The total code size saving is: 263 bytes + Geometric Mean of all benchmarks New / Old: 0.755 + + Regressions: + There are some regressions. Particularly where the length (user arg + length) is large but the position of the match char is near the + beginning of the string (in first VEC). This case has roughly a + 20% regression. + + This is because the new logic gives the hot path for immediate matches + to shorter lengths (the more common input). This case has roughly + a 35% speedup. + + Full xcheck passes on x86_64. + Reviewed-by: H.J. Lu + + (cherry picked from commit b4209615a06b01c974f47b4998b00e4c7b1aa5d9) + +diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S +index 16bf8e02b1e80c84..bddc89c3754894ed 100644 +--- a/sysdeps/x86_64/multiarch/memrchr-evex.S ++++ b/sysdeps/x86_64/multiarch/memrchr-evex.S +@@ -19,319 +19,316 @@ + #if IS_IN (libc) + + # include ++# include "evex256-vecs.h" ++# if VEC_SIZE != 32 ++# error "VEC_SIZE != 32 unimplemented" ++# endif ++ ++# ifndef MEMRCHR ++# define MEMRCHR __memrchr_evex ++# endif ++ ++# define PAGE_SIZE 4096 ++# define VECMATCH VEC(0) ++ ++ .section SECTION(.text), "ax", @progbits ++ENTRY_P2ALIGN(MEMRCHR, 6) ++# ifdef __ILP32__ ++ /* Clear upper bits. */ ++ and %RDX_LP, %RDX_LP ++# else ++ test %RDX_LP, %RDX_LP ++# endif ++ jz L(zero_0) ++ ++ /* Get end pointer. Minus one for two reasons. 1) It is necessary for a ++ correct page cross check and 2) it correctly sets up end ptr to be ++ subtract by lzcnt aligned. */ ++ leaq -1(%rdi, %rdx), %rax ++ vpbroadcastb %esi, %VECMATCH ++ ++ /* Check if we can load 1x VEC without cross a page. */ ++ testl $(PAGE_SIZE - VEC_SIZE), %eax ++ jz L(page_cross) ++ ++ /* Don't use rax for pointer here because EVEX has better encoding with ++ offset % VEC_SIZE == 0. */ ++ vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VECMATCH, %k0 ++ kmovd %k0, %ecx ++ ++ /* Fall through for rdx (len) <= VEC_SIZE (expect small sizes). */ ++ cmpq $VEC_SIZE, %rdx ++ ja L(more_1x_vec) ++L(ret_vec_x0_test): ++ ++ /* If ecx is zero (no matches) lzcnt will set it 32 (VEC_SIZE) which ++ will guarantee edx (len) is less than it. */ ++ lzcntl %ecx, %ecx ++ cmpl %ecx, %edx ++ jle L(zero_0) ++ subq %rcx, %rax ++ ret + +-# define VMOVA vmovdqa64 +- +-# define YMMMATCH ymm16 +- +-# define VEC_SIZE 32 +- +- .section .text.evex,"ax",@progbits +-ENTRY (__memrchr_evex) +- /* Broadcast CHAR to YMMMATCH. */ +- vpbroadcastb %esi, %YMMMATCH +- +- sub $VEC_SIZE, %RDX_LP +- jbe L(last_vec_or_less) +- +- add %RDX_LP, %RDI_LP +- +- /* Check the last VEC_SIZE bytes. */ +- vpcmpb $0, (%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(last_vec_x0) +- +- subq $(VEC_SIZE * 4), %rdi +- movl %edi, %ecx +- andl $(VEC_SIZE - 1), %ecx +- jz L(aligned_more) +- +- /* Align data for aligned loads in the loop. */ +- addq $VEC_SIZE, %rdi +- addq $VEC_SIZE, %rdx +- andq $-VEC_SIZE, %rdi +- subq %rcx, %rdx +- +- .p2align 4 +-L(aligned_more): +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +- +- /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time +- since data is only aligned to VEC_SIZE. */ +- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) +- +- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 +- kmovd %k2, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) +- +- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 +- kmovd %k3, %eax +- testl %eax, %eax +- jnz L(last_vec_x1) +- +- vpcmpb $0, (%rdi), %YMMMATCH, %k4 +- kmovd %k4, %eax +- testl %eax, %eax +- jnz L(last_vec_x0) +- +- /* Align data to 4 * VEC_SIZE for loop with fewer branches. +- There are some overlaps with above if data isn't aligned +- to 4 * VEC_SIZE. */ +- movl %edi, %ecx +- andl $(VEC_SIZE * 4 - 1), %ecx +- jz L(loop_4x_vec) +- +- addq $(VEC_SIZE * 4), %rdi +- addq $(VEC_SIZE * 4), %rdx +- andq $-(VEC_SIZE * 4), %rdi +- subq %rcx, %rdx ++ /* Fits in aligning bytes of first cache line. */ ++L(zero_0): ++ xorl %eax, %eax ++ ret + +- .p2align 4 +-L(loop_4x_vec): +- /* Compare 4 * VEC at a time forward. */ +- subq $(VEC_SIZE * 4), %rdi +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +- +- vpcmpb $0, (%rdi), %YMMMATCH, %k1 +- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2 +- kord %k1, %k2, %k5 +- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3 +- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4 +- +- kord %k3, %k4, %k6 +- kortestd %k5, %k6 +- jz L(loop_4x_vec) +- +- /* There is a match. */ +- kmovd %k4, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) +- +- kmovd %k3, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) +- +- kmovd %k2, %eax +- testl %eax, %eax +- jnz L(last_vec_x1) +- +- kmovd %k1, %eax +- bsrl %eax, %eax +- addq %rdi, %rax ++ .p2align 4,, 9 ++L(ret_vec_x0_dec): ++ decq %rax ++L(ret_vec_x0): ++ lzcntl %ecx, %ecx ++ subq %rcx, %rax + ret + +- .p2align 4 +-L(last_4x_vec_or_less): +- addl $(VEC_SIZE * 4), %edx +- cmpl $(VEC_SIZE * 2), %edx +- jbe L(last_2x_vec) ++ .p2align 4,, 10 ++L(more_1x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0) + +- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) ++ /* Align rax (pointer to string). */ ++ andq $-VEC_SIZE, %rax + +- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 +- kmovd %k2, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) ++ /* Recompute length after aligning. */ ++ movq %rax, %rdx + +- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 +- kmovd %k3, %eax +- testl %eax, %eax +- jnz L(last_vec_x1_check) +- cmpl $(VEC_SIZE * 3), %edx +- jbe L(zero) ++ /* Need no matter what. */ ++ vpcmpb $0, -(VEC_SIZE)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- vpcmpb $0, (%rdi), %YMMMATCH, %k4 +- kmovd %k4, %eax +- testl %eax, %eax +- jz L(zero) +- bsrl %eax, %eax +- subq $(VEC_SIZE * 4), %rdx +- addq %rax, %rdx +- jl L(zero) +- addq %rdi, %rax +- ret ++ subq %rdi, %rdx + +- .p2align 4 ++ cmpq $(VEC_SIZE * 2), %rdx ++ ja L(more_2x_vec) + L(last_2x_vec): +- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3_check) ++ ++ /* Must dec rax because L(ret_vec_x0_test) expects it. */ ++ decq %rax + cmpl $VEC_SIZE, %edx +- jbe L(zero) +- +- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jz L(zero) +- bsrl %eax, %eax +- subq $(VEC_SIZE * 2), %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $(VEC_SIZE * 2), %eax +- addq %rdi, %rax ++ jbe L(ret_vec_x0_test) ++ ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0) ++ ++ /* Don't use rax for pointer here because EVEX has better encoding with ++ offset % VEC_SIZE == 0. */ ++ vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VECMATCH, %k0 ++ kmovd %k0, %ecx ++ /* NB: 64-bit lzcnt. This will naturally add 32 to position. */ ++ lzcntq %rcx, %rcx ++ cmpl %ecx, %edx ++ jle L(zero_0) ++ subq %rcx, %rax + ret + +- .p2align 4 +-L(last_vec_x0): +- bsrl %eax, %eax +- addq %rdi, %rax ++ /* Inexpensive place to put this regarding code size / target alignments ++ / ICache NLP. Necessary for 2-byte encoding of jump to page cross ++ case which in turn is necessary for hot path (len <= VEC_SIZE) to fit ++ in first cache line. */ ++L(page_cross): ++ movq %rax, %rsi ++ andq $-VEC_SIZE, %rsi ++ vpcmpb $0, (%rsi), %VECMATCH, %k0 ++ kmovd %k0, %r8d ++ /* Shift out negative alignment (because we are starting from endptr and ++ working backwards). */ ++ movl %eax, %ecx ++ /* notl because eax already has endptr - 1. (-x = ~(x - 1)). */ ++ notl %ecx ++ shlxl %ecx, %r8d, %ecx ++ cmpq %rdi, %rsi ++ ja L(more_1x_vec) ++ lzcntl %ecx, %ecx ++ cmpl %ecx, %edx ++ jle L(zero_1) ++ subq %rcx, %rax + ret + +- .p2align 4 +-L(last_vec_x1): +- bsrl %eax, %eax +- addl $VEC_SIZE, %eax +- addq %rdi, %rax ++ /* Continue creating zero labels that fit in aligning bytes and get ++ 2-byte encoding / are in the same cache line as condition. */ ++L(zero_1): ++ xorl %eax, %eax + ret + +- .p2align 4 +-L(last_vec_x2): +- bsrl %eax, %eax +- addl $(VEC_SIZE * 2), %eax +- addq %rdi, %rax ++ .p2align 4,, 8 ++L(ret_vec_x1): ++ /* This will naturally add 32 to position. */ ++ bsrl %ecx, %ecx ++ leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax + ret + +- .p2align 4 +-L(last_vec_x3): +- bsrl %eax, %eax +- addl $(VEC_SIZE * 3), %eax +- addq %rdi, %rax +- ret ++ .p2align 4,, 8 ++L(more_2x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0_dec) + +- .p2align 4 +-L(last_vec_x1_check): +- bsrl %eax, %eax +- subq $(VEC_SIZE * 3), %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $VEC_SIZE, %eax +- addq %rdi, %rax +- ret ++ vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1) + +- .p2align 4 +-L(last_vec_x3_check): +- bsrl %eax, %eax +- subq $VEC_SIZE, %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $(VEC_SIZE * 3), %eax +- addq %rdi, %rax +- ret ++ /* Need no matter what. */ ++ vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- .p2align 4 +-L(zero): +- xorl %eax, %eax ++ subq $(VEC_SIZE * 4), %rdx ++ ja L(more_4x_vec) ++ ++ cmpl $(VEC_SIZE * -1), %edx ++ jle L(ret_vec_x2_test) ++L(last_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x2) ++ ++ ++ /* Need no matter what. */ ++ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 3 + 1), %rax ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ ja L(zero_1) + ret + +- .p2align 4 +-L(last_vec_or_less_aligned): +- movl %edx, %ecx +- +- vpcmpb $0, (%rdi), %YMMMATCH, %k1 +- +- movl $1, %edx +- /* Support rdx << 32. */ +- salq %cl, %rdx +- subq $1, %rdx +- +- kmovd %k1, %eax +- +- /* Remove the trailing bytes. */ +- andl %edx, %eax +- testl %eax, %eax +- jz L(zero) +- +- bsrl %eax, %eax +- addq %rdi, %rax ++ .p2align 4,, 8 ++L(ret_vec_x2_test): ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 2 + 1), %rax ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ ja L(zero_1) + ret + +- .p2align 4 +-L(last_vec_or_less): +- addl $VEC_SIZE, %edx +- +- /* Check for zero length. */ +- testl %edx, %edx +- jz L(zero) +- +- movl %edi, %ecx +- andl $(VEC_SIZE - 1), %ecx +- jz L(last_vec_or_less_aligned) +- +- movl %ecx, %esi +- movl %ecx, %r8d +- addl %edx, %esi +- andq $-VEC_SIZE, %rdi ++ .p2align 4,, 8 ++L(ret_vec_x2): ++ bsrl %ecx, %ecx ++ leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax ++ ret + +- subl $VEC_SIZE, %esi +- ja L(last_vec_2x_aligned) ++ .p2align 4,, 8 ++L(ret_vec_x3): ++ bsrl %ecx, %ecx ++ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax ++ ret + +- /* Check the last VEC. */ +- vpcmpb $0, (%rdi), %YMMMATCH, %k1 +- kmovd %k1, %eax ++ .p2align 4,, 8 ++L(more_4x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x2) + +- /* Remove the leading and trailing bytes. */ +- sarl %cl, %eax +- movl %edx, %ecx ++ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- movl $1, %edx +- sall %cl, %edx +- subl $1, %edx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x3) + +- andl %edx, %eax +- testl %eax, %eax +- jz L(zero) ++ /* Check if near end before re-aligning (otherwise might do an ++ unnecessary loop iteration). */ ++ addq $-(VEC_SIZE * 4), %rax ++ cmpq $(VEC_SIZE * 4), %rdx ++ jbe L(last_4x_vec) + +- bsrl %eax, %eax +- addq %rdi, %rax +- addq %r8, %rax +- ret ++ decq %rax ++ andq $-(VEC_SIZE * 4), %rax ++ movq %rdi, %rdx ++ /* Get endptr for loop in rdx. NB: Can't just do while rax > rdi because ++ lengths that overflow can be valid and break the comparison. */ ++ andq $-(VEC_SIZE * 4), %rdx + + .p2align 4 +-L(last_vec_2x_aligned): +- movl %esi, %ecx +- +- /* Check the last VEC. */ +- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1 ++L(loop_4x_vec): ++ /* Store 1 were not-equals and 0 where equals in k1 (used to mask later ++ on). */ ++ vpcmpb $4, (VEC_SIZE * 3)(%rax), %VECMATCH, %k1 ++ ++ /* VEC(2/3) will have zero-byte where we found a CHAR. */ ++ vpxorq (VEC_SIZE * 2)(%rax), %VECMATCH, %VEC(2) ++ vpxorq (VEC_SIZE * 1)(%rax), %VECMATCH, %VEC(3) ++ vpcmpb $0, (VEC_SIZE * 0)(%rax), %VECMATCH, %k4 ++ ++ /* Combine VEC(2/3) with min and maskz with k1 (k1 has zero bit where ++ CHAR is found and VEC(2/3) have zero-byte where CHAR is found. */ ++ vpminub %VEC(2), %VEC(3), %VEC(3){%k1}{z} ++ vptestnmb %VEC(3), %VEC(3), %k2 ++ ++ /* Any 1s and we found CHAR. */ ++ kortestd %k2, %k4 ++ jnz L(loop_end) ++ ++ addq $-(VEC_SIZE * 4), %rax ++ cmpq %rdx, %rax ++ jne L(loop_4x_vec) ++ ++ /* Need to re-adjust rdx / rax for L(last_4x_vec). */ ++ subq $-(VEC_SIZE * 4), %rdx ++ movq %rdx, %rax ++ subl %edi, %edx ++L(last_4x_vec): ++ ++ /* Used no matter what. */ ++ vpcmpb $0, (VEC_SIZE * -1)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- movl $1, %edx +- sall %cl, %edx +- subl $1, %edx ++ cmpl $(VEC_SIZE * 2), %edx ++ jbe L(last_2x_vec) + +- kmovd %k1, %eax ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0_dec) + +- /* Remove the trailing bytes. */ +- andl %edx, %eax + +- testl %eax, %eax +- jnz L(last_vec_x1) ++ vpcmpb $0, (VEC_SIZE * -2)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- /* Check the second last VEC. */ +- vpcmpb $0, (%rdi), %YMMMATCH, %k1 ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1) + +- movl %r8d, %ecx ++ /* Used no matter what. */ ++ vpcmpb $0, (VEC_SIZE * -3)(%rax), %VECMATCH, %k0 ++ kmovd %k0, %ecx + +- kmovd %k1, %eax ++ cmpl $(VEC_SIZE * 3), %edx ++ ja L(last_vec) + +- /* Remove the leading bytes. Must use unsigned right shift for +- bsrl below. */ +- shrl %cl, %eax +- testl %eax, %eax +- jz L(zero) ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 2 + 1), %rax ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ jbe L(ret_1) ++ xorl %eax, %eax ++L(ret_1): ++ ret + +- bsrl %eax, %eax +- addq %rdi, %rax +- addq %r8, %rax ++ .p2align 4,, 6 ++L(loop_end): ++ kmovd %k1, %ecx ++ notl %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0_end) ++ ++ vptestnmb %VEC(2), %VEC(2), %k0 ++ kmovd %k0, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1_end) ++ ++ kmovd %k2, %ecx ++ kmovd %k4, %esi ++ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3) ++ then it won't affect the result in esi (VEC4). If ecx is non-zero ++ then CHAR in VEC3 and bsrq will use that position. */ ++ salq $32, %rcx ++ orq %rsi, %rcx ++ bsrq %rcx, %rcx ++ addq %rcx, %rax ++ ret ++ .p2align 4,, 4 ++L(ret_vec_x0_end): ++ addq $(VEC_SIZE), %rax ++L(ret_vec_x1_end): ++ bsrl %ecx, %ecx ++ leaq (VEC_SIZE * 2)(%rax, %rcx), %rax + ret +-END (__memrchr_evex) ++ ++END(MEMRCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-285.patch b/SOURCES/glibc-upstream-2.34-285.patch new file mode 100644 index 0000000..c3b0837 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-285.patch @@ -0,0 +1,645 @@ +commit b05bd59823bcedee281d3fd5bd4928698ea9d69d +Author: Noah Goldstein +Date: Mon Jun 6 21:11:32 2022 -0700 + + x86: Optimize memrchr-avx2.S + + The new code: + 1. prioritizes smaller user-arg lengths more. + 2. optimizes target placement more carefully + 3. reuses logic more + 4. fixes up various inefficiencies in the logic. The biggest + case here is the `lzcnt` logic for checking returns which + saves either a branch or multiple instructions. + + The total code size saving is: 306 bytes + Geometric Mean of all benchmarks New / Old: 0.760 + + Regressions: + There are some regressions. Particularly where the length (user arg + length) is large but the position of the match char is near the + beginning of the string (in first VEC). This case has roughly a + 10-20% regression. + + This is because the new logic gives the hot path for immediate matches + to shorter lengths (the more common input). This case has roughly + a 15-45% speedup. + + Full xcheck passes on x86_64. + Reviewed-by: H.J. Lu + + (cherry picked from commit af5306a735eb0966fdc2f8ccdafa8888e2df0c87) + +diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S +index cea2d2a72db7406a..5e9beeeef2677c9f 100644 +--- a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S ++++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S +@@ -2,6 +2,7 @@ + # define MEMRCHR __memrchr_avx2_rtm + #endif + ++#define COND_VZEROUPPER COND_VZEROUPPER_XTEST + #define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S +index ac7370cb06e9a0fd..5f8e0be18cfe4fad 100644 +--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S +@@ -21,340 +21,318 @@ + # include + + # ifndef MEMRCHR +-# define MEMRCHR __memrchr_avx2 ++# define MEMRCHR __memrchr_avx2 + # endif + + # ifndef VZEROUPPER +-# define VZEROUPPER vzeroupper ++# define VZEROUPPER vzeroupper + # endif + + # ifndef SECTION + # define SECTION(p) p##.avx + # endif + +-# define VEC_SIZE 32 ++# define VEC_SIZE 32 ++# define PAGE_SIZE 4096 ++ .section SECTION(.text), "ax", @progbits ++ENTRY(MEMRCHR) ++# ifdef __ILP32__ ++ /* Clear upper bits. */ ++ and %RDX_LP, %RDX_LP ++# else ++ test %RDX_LP, %RDX_LP ++# endif ++ jz L(zero_0) + +- .section SECTION(.text),"ax",@progbits +-ENTRY (MEMRCHR) +- /* Broadcast CHAR to YMM0. */ + vmovd %esi, %xmm0 +- vpbroadcastb %xmm0, %ymm0 +- +- sub $VEC_SIZE, %RDX_LP +- jbe L(last_vec_or_less) +- +- add %RDX_LP, %RDI_LP +- +- /* Check the last VEC_SIZE bytes. */ +- vpcmpeqb (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(last_vec_x0) ++ /* Get end pointer. Minus one for two reasons. 1) It is necessary for a ++ correct page cross check and 2) it correctly sets up end ptr to be ++ subtract by lzcnt aligned. */ ++ leaq -1(%rdx, %rdi), %rax + +- subq $(VEC_SIZE * 4), %rdi +- movl %edi, %ecx +- andl $(VEC_SIZE - 1), %ecx +- jz L(aligned_more) ++ vpbroadcastb %xmm0, %ymm0 + +- /* Align data for aligned loads in the loop. */ +- addq $VEC_SIZE, %rdi +- addq $VEC_SIZE, %rdx +- andq $-VEC_SIZE, %rdi +- subq %rcx, %rdx ++ /* Check if we can load 1x VEC without cross a page. */ ++ testl $(PAGE_SIZE - VEC_SIZE), %eax ++ jz L(page_cross) ++ ++ vpcmpeqb -(VEC_SIZE - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ cmpq $VEC_SIZE, %rdx ++ ja L(more_1x_vec) ++ ++L(ret_vec_x0_test): ++ /* If ecx is zero (no matches) lzcnt will set it 32 (VEC_SIZE) which ++ will gurantee edx (len) is less than it. */ ++ lzcntl %ecx, %ecx ++ ++ /* Hoist vzeroupper (not great for RTM) to save code size. This allows ++ all logic for edx (len) <= VEC_SIZE to fit in first cache line. */ ++ COND_VZEROUPPER ++ cmpl %ecx, %edx ++ jle L(zero_0) ++ subq %rcx, %rax ++ ret + +- .p2align 4 +-L(aligned_more): +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +- +- /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time +- since data is only aligned to VEC_SIZE. */ +- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) +- +- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2 +- vpmovmskb %ymm2, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) +- +- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3 +- vpmovmskb %ymm3, %eax +- testl %eax, %eax +- jnz L(last_vec_x1) +- +- vpcmpeqb (%rdi), %ymm0, %ymm4 +- vpmovmskb %ymm4, %eax +- testl %eax, %eax +- jnz L(last_vec_x0) +- +- /* Align data to 4 * VEC_SIZE for loop with fewer branches. +- There are some overlaps with above if data isn't aligned +- to 4 * VEC_SIZE. */ +- movl %edi, %ecx +- andl $(VEC_SIZE * 4 - 1), %ecx +- jz L(loop_4x_vec) +- +- addq $(VEC_SIZE * 4), %rdi +- addq $(VEC_SIZE * 4), %rdx +- andq $-(VEC_SIZE * 4), %rdi +- subq %rcx, %rdx ++ /* Fits in aligning bytes of first cache line. */ ++L(zero_0): ++ xorl %eax, %eax ++ ret + +- .p2align 4 +-L(loop_4x_vec): +- /* Compare 4 * VEC at a time forward. */ +- subq $(VEC_SIZE * 4), %rdi +- subq $(VEC_SIZE * 4), %rdx +- jbe L(last_4x_vec_or_less) +- +- vmovdqa (%rdi), %ymm1 +- vmovdqa VEC_SIZE(%rdi), %ymm2 +- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3 +- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4 +- +- vpcmpeqb %ymm1, %ymm0, %ymm1 +- vpcmpeqb %ymm2, %ymm0, %ymm2 +- vpcmpeqb %ymm3, %ymm0, %ymm3 +- vpcmpeqb %ymm4, %ymm0, %ymm4 +- +- vpor %ymm1, %ymm2, %ymm5 +- vpor %ymm3, %ymm4, %ymm6 +- vpor %ymm5, %ymm6, %ymm5 +- +- vpmovmskb %ymm5, %eax +- testl %eax, %eax +- jz L(loop_4x_vec) +- +- /* There is a match. */ +- vpmovmskb %ymm4, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) +- +- vpmovmskb %ymm3, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) +- +- vpmovmskb %ymm2, %eax +- testl %eax, %eax +- jnz L(last_vec_x1) +- +- vpmovmskb %ymm1, %eax +- bsrl %eax, %eax +- addq %rdi, %rax ++ .p2align 4,, 9 ++L(ret_vec_x0): ++ lzcntl %ecx, %ecx ++ subq %rcx, %rax + L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + +- .p2align 4 +-L(last_4x_vec_or_less): +- addl $(VEC_SIZE * 4), %edx +- cmpl $(VEC_SIZE * 2), %edx +- jbe L(last_2x_vec) +- +- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3) +- +- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2 +- vpmovmskb %ymm2, %eax +- testl %eax, %eax +- jnz L(last_vec_x2) +- +- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3 +- vpmovmskb %ymm3, %eax +- testl %eax, %eax +- jnz L(last_vec_x1_check) +- cmpl $(VEC_SIZE * 3), %edx +- jbe L(zero) +- +- vpcmpeqb (%rdi), %ymm0, %ymm4 +- vpmovmskb %ymm4, %eax +- testl %eax, %eax +- jz L(zero) +- bsrl %eax, %eax +- subq $(VEC_SIZE * 4), %rdx +- addq %rax, %rdx +- jl L(zero) +- addq %rdi, %rax +- VZEROUPPER_RETURN +- +- .p2align 4 ++ .p2align 4,, 10 ++L(more_1x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0) ++ ++ /* Align rax (string pointer). */ ++ andq $-VEC_SIZE, %rax ++ ++ /* Recompute remaining length after aligning. */ ++ movq %rax, %rdx ++ /* Need this comparison next no matter what. */ ++ vpcmpeqb -(VEC_SIZE)(%rax), %ymm0, %ymm1 ++ subq %rdi, %rdx ++ decq %rax ++ vpmovmskb %ymm1, %ecx ++ /* Fall through for short (hotter than length). */ ++ cmpq $(VEC_SIZE * 2), %rdx ++ ja L(more_2x_vec) + L(last_2x_vec): +- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jnz L(last_vec_x3_check) + cmpl $VEC_SIZE, %edx +- jbe L(zero) +- +- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- testl %eax, %eax +- jz L(zero) +- bsrl %eax, %eax +- subq $(VEC_SIZE * 2), %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $(VEC_SIZE * 2), %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN +- +- .p2align 4 +-L(last_vec_x0): +- bsrl %eax, %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN ++ jbe L(ret_vec_x0_test) ++ ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0) ++ ++ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ /* 64-bit lzcnt. This will naturally add 32 to position. */ ++ lzcntq %rcx, %rcx ++ COND_VZEROUPPER ++ cmpl %ecx, %edx ++ jle L(zero_0) ++ subq %rcx, %rax ++ ret + +- .p2align 4 +-L(last_vec_x1): +- bsrl %eax, %eax +- addl $VEC_SIZE, %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN + +- .p2align 4 +-L(last_vec_x2): +- bsrl %eax, %eax +- addl $(VEC_SIZE * 2), %eax +- addq %rdi, %rax ++ /* Inexpensive place to put this regarding code size / target alignments ++ / ICache NLP. Necessary for 2-byte encoding of jump to page cross ++ case which in turn is necessary for hot path (len <= VEC_SIZE) to fit ++ in first cache line. */ ++L(page_cross): ++ movq %rax, %rsi ++ andq $-VEC_SIZE, %rsi ++ vpcmpeqb (%rsi), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ /* Shift out negative alignment (because we are starting from endptr and ++ working backwards). */ ++ movl %eax, %r8d ++ /* notl because eax already has endptr - 1. (-x = ~(x - 1)). */ ++ notl %r8d ++ shlxl %r8d, %ecx, %ecx ++ cmpq %rdi, %rsi ++ ja L(more_1x_vec) ++ lzcntl %ecx, %ecx ++ COND_VZEROUPPER ++ cmpl %ecx, %edx ++ jle L(zero_0) ++ subq %rcx, %rax ++ ret ++ .p2align 4,, 11 ++L(ret_vec_x1): ++ /* This will naturally add 32 to position. */ ++ lzcntq %rcx, %rcx ++ subq %rcx, %rax + VZEROUPPER_RETURN ++ .p2align 4,, 10 ++L(more_2x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0) + +- .p2align 4 +-L(last_vec_x3): +- bsrl %eax, %eax +- addl $(VEC_SIZE * 3), %eax +- addq %rdi, %rax +- ret ++ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1) + +- .p2align 4 +-L(last_vec_x1_check): +- bsrl %eax, %eax +- subq $(VEC_SIZE * 3), %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $VEC_SIZE, %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN + +- .p2align 4 +-L(last_vec_x3_check): +- bsrl %eax, %eax +- subq $VEC_SIZE, %rdx +- addq %rax, %rdx +- jl L(zero) +- addl $(VEC_SIZE * 3), %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN ++ /* Needed no matter what. */ ++ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx + +- .p2align 4 +-L(zero): +- xorl %eax, %eax +- VZEROUPPER_RETURN ++ subq $(VEC_SIZE * 4), %rdx ++ ja L(more_4x_vec) ++ ++ cmpl $(VEC_SIZE * -1), %edx ++ jle L(ret_vec_x2_test) ++ ++L(last_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x2) ++ ++ /* Needed no matter what. */ ++ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 3), %rax ++ COND_VZEROUPPER ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ ja L(zero_2) ++ ret + +- .p2align 4 +-L(null): ++ /* First in aligning bytes. */ ++L(zero_2): + xorl %eax, %eax + ret + +- .p2align 4 +-L(last_vec_or_less_aligned): +- movl %edx, %ecx ++ .p2align 4,, 4 ++L(ret_vec_x2_test): ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 2), %rax ++ COND_VZEROUPPER ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ ja L(zero_2) ++ ret + +- vpcmpeqb (%rdi), %ymm0, %ymm1 + +- movl $1, %edx +- /* Support rdx << 32. */ +- salq %cl, %rdx +- subq $1, %rdx ++ .p2align 4,, 11 ++L(ret_vec_x2): ++ /* ecx must be non-zero. */ ++ bsrl %ecx, %ecx ++ leaq (VEC_SIZE * -3 + 1)(%rcx, %rax), %rax ++ VZEROUPPER_RETURN + +- vpmovmskb %ymm1, %eax ++ .p2align 4,, 14 ++L(ret_vec_x3): ++ /* ecx must be non-zero. */ ++ bsrl %ecx, %ecx ++ leaq (VEC_SIZE * -4 + 1)(%rcx, %rax), %rax ++ VZEROUPPER_RETURN + +- /* Remove the trailing bytes. */ +- andl %edx, %eax +- testl %eax, %eax +- jz L(zero) + +- bsrl %eax, %eax +- addq %rdi, %rax +- VZEROUPPER_RETURN + + .p2align 4 +-L(last_vec_or_less): +- addl $VEC_SIZE, %edx ++L(more_4x_vec): ++ testl %ecx, %ecx ++ jnz L(ret_vec_x2) + +- /* Check for zero length. */ +- testl %edx, %edx +- jz L(null) ++ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx + +- movl %edi, %ecx +- andl $(VEC_SIZE - 1), %ecx +- jz L(last_vec_or_less_aligned) ++ testl %ecx, %ecx ++ jnz L(ret_vec_x3) + +- movl %ecx, %esi +- movl %ecx, %r8d +- addl %edx, %esi +- andq $-VEC_SIZE, %rdi ++ /* Check if near end before re-aligning (otherwise might do an ++ unnecissary loop iteration). */ ++ addq $-(VEC_SIZE * 4), %rax ++ cmpq $(VEC_SIZE * 4), %rdx ++ jbe L(last_4x_vec) + +- subl $VEC_SIZE, %esi +- ja L(last_vec_2x_aligned) ++ /* Align rax to (VEC_SIZE - 1). */ ++ orq $(VEC_SIZE * 4 - 1), %rax ++ movq %rdi, %rdx ++ /* Get endptr for loop in rdx. NB: Can't just do while rax > rdi because ++ lengths that overflow can be valid and break the comparison. */ ++ orq $(VEC_SIZE * 4 - 1), %rdx + +- /* Check the last VEC. */ +- vpcmpeqb (%rdi), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +- +- /* Remove the leading and trailing bytes. */ +- sarl %cl, %eax +- movl %edx, %ecx ++ .p2align 4 ++L(loop_4x_vec): ++ /* Need this comparison next no matter what. */ ++ vpcmpeqb -(VEC_SIZE * 1 - 1)(%rax), %ymm0, %ymm1 ++ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm2 ++ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm3 ++ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm4 + +- movl $1, %edx +- sall %cl, %edx +- subl $1, %edx ++ vpor %ymm1, %ymm2, %ymm2 ++ vpor %ymm3, %ymm4, %ymm4 ++ vpor %ymm2, %ymm4, %ymm4 ++ vpmovmskb %ymm4, %esi + +- andl %edx, %eax +- testl %eax, %eax +- jz L(zero) ++ testl %esi, %esi ++ jnz L(loop_end) + +- bsrl %eax, %eax +- addq %rdi, %rax +- addq %r8, %rax +- VZEROUPPER_RETURN ++ addq $(VEC_SIZE * -4), %rax ++ cmpq %rdx, %rax ++ jne L(loop_4x_vec) + +- .p2align 4 +-L(last_vec_2x_aligned): +- movl %esi, %ecx ++ subl %edi, %edx ++ incl %edx + +- /* Check the last VEC. */ +- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm1 ++L(last_4x_vec): ++ /* Used no matter what. */ ++ vpcmpeqb -(VEC_SIZE * 1 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx + +- movl $1, %edx +- sall %cl, %edx +- subl $1, %edx ++ cmpl $(VEC_SIZE * 2), %edx ++ jbe L(last_2x_vec) + +- vpmovmskb %ymm1, %eax ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0_end) + +- /* Remove the trailing bytes. */ +- andl %edx, %eax ++ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1_end) + +- testl %eax, %eax +- jnz L(last_vec_x1) ++ /* Used no matter what. */ ++ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %ecx + +- /* Check the second last VEC. */ +- vpcmpeqb (%rdi), %ymm0, %ymm1 ++ cmpl $(VEC_SIZE * 3), %edx ++ ja L(last_vec) ++ ++ lzcntl %ecx, %ecx ++ subq $(VEC_SIZE * 2), %rax ++ COND_VZEROUPPER ++ subq %rcx, %rax ++ cmpq %rax, %rdi ++ jbe L(ret0) ++ xorl %eax, %eax ++L(ret0): ++ ret + +- movl %r8d, %ecx + +- vpmovmskb %ymm1, %eax ++ .p2align 4 ++L(loop_end): ++ vpmovmskb %ymm1, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x0_end) ++ ++ vpmovmskb %ymm2, %ecx ++ testl %ecx, %ecx ++ jnz L(ret_vec_x1_end) ++ ++ vpmovmskb %ymm3, %ecx ++ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3) ++ then it won't affect the result in esi (VEC4). If ecx is non-zero ++ then CHAR in VEC3 and bsrq will use that position. */ ++ salq $32, %rcx ++ orq %rsi, %rcx ++ bsrq %rcx, %rcx ++ leaq (VEC_SIZE * -4 + 1)(%rcx, %rax), %rax ++ VZEROUPPER_RETURN + +- /* Remove the leading bytes. Must use unsigned right shift for +- bsrl below. */ +- shrl %cl, %eax +- testl %eax, %eax +- jz L(zero) ++ .p2align 4,, 4 ++L(ret_vec_x1_end): ++ /* 64-bit version will automatically add 32 (VEC_SIZE). */ ++ lzcntq %rcx, %rcx ++ subq %rcx, %rax ++ VZEROUPPER_RETURN + +- bsrl %eax, %eax +- addq %rdi, %rax +- addq %r8, %rax ++ .p2align 4,, 4 ++L(ret_vec_x0_end): ++ lzcntl %ecx, %ecx ++ subq %rcx, %rax + VZEROUPPER_RETURN +-END (MEMRCHR) ++ ++ /* 2 bytes until next cache line. */ ++END(MEMRCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-286.patch b/SOURCES/glibc-upstream-2.34-286.patch new file mode 100644 index 0000000..41a0188 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-286.patch @@ -0,0 +1,323 @@ +commit a910d7e164f1d9b8e77bbea35a2d2ab89a5e26cc +Author: Noah Goldstein +Date: Mon Jun 6 21:11:33 2022 -0700 + + x86: Shrink code size of memchr-avx2.S + + This is not meant as a performance optimization. The previous code was + far to liberal in aligning targets and wasted code size unnecissarily. + + The total code size saving is: 59 bytes + + There are no major changes in the benchmarks. + Geometric Mean of all benchmarks New / Old: 0.967 + + Full xcheck passes on x86_64. + Reviewed-by: H.J. Lu + + (cherry picked from commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35) + + x86: Fix page cross case in rawmemchr-avx2 [BZ #29234] + + commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35 + Author: Noah Goldstein + Date: Mon Jun 6 21:11:33 2022 -0700 + + x86: Shrink code size of memchr-avx2.S + + Changed how the page cross case aligned string (rdi) in + rawmemchr. This was incompatible with how + `L(cross_page_continue)` expected the pointer to be aligned and + would cause rawmemchr to read data start started before the + beginning of the string. What it would read was in valid memory + but could count CHAR matches resulting in an incorrect return + value. + + This commit fixes that issue by essentially reverting the changes to + the L(page_cross) case as they didn't really matter. + + Test cases added and all pass with the new code (and where confirmed + to fail with the old code). + Reviewed-by: H.J. Lu + + (cherry picked from commit 2c9af8421d2b4a7fcce163e7bc81a118d22fd346) + +diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c +index 085098aba8fdbc13..327c0654e69e7669 100644 +--- a/string/test-rawmemchr.c ++++ b/string/test-rawmemchr.c +@@ -18,6 +18,7 @@ + . */ + + #include ++#include + + #define TEST_MAIN + #define TEST_NAME "rawmemchr" +@@ -51,13 +52,45 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res) + } + } + ++static void ++do_test_bz29234 (void) ++{ ++ size_t i, j; ++ char *ptr_start; ++ char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1); ++ ++ memset (buf, -1, 8192); ++ ++ ptr_start = buf + 4096 - 8; ++ ++ /* Out of range matches before the start of a page. */ ++ memset (ptr_start - 8, 0x1, 8); ++ ++ for (j = 0; j < 8; ++j) ++ { ++ for (i = 0; i < 128; ++i) ++ { ++ ptr_start[i + j] = 0x1; ++ ++ FOR_EACH_IMPL (impl, 0) ++ do_one_test (impl, (char *) (ptr_start + j), 0x1, ++ ptr_start + i + j); ++ ++ ptr_start[i + j] = 0xff; ++ } ++ } ++ ++ xmunmap (buf, 8192); ++} ++ + static void + do_test (size_t align, size_t pos, size_t len, int seek_char) + { + size_t i; + char *result; + +- align &= 7; ++ align &= getpagesize () - 1; + if (align + len >= page_size) + return; + +@@ -115,6 +148,13 @@ do_random_tests (void) + } + } + ++ if (align) ++ { ++ p[align - 1] = seek_char; ++ if (align > 4) ++ p[align - 4] = seek_char; ++ } ++ + assert (pos < len); + size_t r = random (); + if ((r & 31) == 0) +@@ -130,6 +170,13 @@ do_random_tests (void) + result, p); + ret = 1; + } ++ ++ if (align) ++ { ++ p[align - 1] = seek_char; ++ if (align > 4) ++ p[align - 4] = seek_char; ++ } + } + } + +@@ -151,14 +198,22 @@ test_main (void) + do_test (i, 64, 256, 23); + do_test (0, 16 << i, 2048, 0); + do_test (i, 64, 256, 0); ++ ++ do_test (getpagesize () - i, 64, 256, 23); ++ do_test (getpagesize () - i, 64, 256, 0); + } + for (i = 1; i < 32; ++i) + { + do_test (0, i, i + 1, 23); + do_test (0, i, i + 1, 0); ++ ++ do_test (getpagesize () - 7, i, i + 1, 23); ++ do_test (getpagesize () - i / 2, i, i + 1, 23); ++ do_test (getpagesize () - i, i, i + 1, 23); + } + + do_random_tests (); ++ do_test_bz29234 (); + return ret; + } + +diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S +index 87b076c7c403ba85..c4d71938c5a3ed24 100644 +--- a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S ++++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S +@@ -2,6 +2,7 @@ + # define MEMCHR __memchr_avx2_rtm + #endif + ++#define COND_VZEROUPPER COND_VZEROUPPER_XTEST + #define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S +index afdb95650232fdac..9e0b7dd1f4fe9909 100644 +--- a/sysdeps/x86_64/multiarch/memchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/memchr-avx2.S +@@ -57,7 +57,7 @@ + # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + .section SECTION(.text),"ax",@progbits +-ENTRY (MEMCHR) ++ENTRY_P2ALIGN (MEMCHR, 5) + # ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ + # ifdef __ILP32__ +@@ -87,12 +87,14 @@ ENTRY (MEMCHR) + # endif + testl %eax, %eax + jz L(aligned_more) +- tzcntl %eax, %eax ++ bsfl %eax, %eax + addq %rdi, %rax +- VZEROUPPER_RETURN ++L(return_vzeroupper): ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++ + + # ifndef USE_AS_RAWMEMCHR +- .p2align 5 ++ .p2align 4 + L(first_vec_x0): + /* Check if first match was before length. */ + tzcntl %eax, %eax +@@ -100,58 +102,31 @@ L(first_vec_x0): + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx + # endif +- xorl %ecx, %ecx ++ COND_VZEROUPPER ++ /* Use branch instead of cmovcc so L(first_vec_x0) fits in one fetch ++ block. branch here as opposed to cmovcc is not that costly. Common ++ usage of memchr is to check if the return was NULL (if string was ++ known to contain CHAR user would use rawmemchr). This branch will be ++ highly correlated with the user branch and can be used by most ++ modern branch predictors to predict the user branch. */ + cmpl %eax, %edx +- leaq (%rdi, %rax), %rax +- cmovle %rcx, %rax +- VZEROUPPER_RETURN +- +-L(null): +- xorl %eax, %eax +- ret +-# endif +- .p2align 4 +-L(cross_page_boundary): +- /* Save pointer before aligning as its original value is +- necessary for computer return address if byte is found or +- adjusting length if it is not and this is memchr. */ +- movq %rdi, %rcx +- /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr +- and rdi for rawmemchr. */ +- orq $(VEC_SIZE - 1), %ALGN_PTR_REG +- VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 +- vpmovmskb %ymm1, %eax +-# ifndef USE_AS_RAWMEMCHR +- /* Calculate length until end of page (length checked for a +- match). */ +- leaq 1(%ALGN_PTR_REG), %rsi +- subq %RRAW_PTR_REG, %rsi +-# ifdef USE_AS_WMEMCHR +- /* NB: Divide bytes by 4 to get wchar_t count. */ +- shrl $2, %esi +-# endif +-# endif +- /* Remove the leading bytes. */ +- sarxl %ERAW_PTR_REG, %eax, %eax +-# ifndef USE_AS_RAWMEMCHR +- /* Check the end of data. */ +- cmpq %rsi, %rdx +- jbe L(first_vec_x0) ++ jle L(null) ++ addq %rdi, %rax ++ ret + # endif +- testl %eax, %eax +- jz L(cross_page_continue) +- tzcntl %eax, %eax +- addq %RRAW_PTR_REG, %rax +-L(return_vzeroupper): +- ZERO_UPPER_VEC_REGISTERS_RETURN + +- .p2align 4 ++ .p2align 4,, 10 + L(first_vec_x1): +- tzcntl %eax, %eax ++ bsfl %eax, %eax + incq %rdi + addq %rdi, %rax + VZEROUPPER_RETURN +- ++# ifndef USE_AS_RAWMEMCHR ++ /* First in aligning bytes here. */ ++L(null): ++ xorl %eax, %eax ++ ret ++# endif + .p2align 4 + L(first_vec_x2): + tzcntl %eax, %eax +@@ -340,7 +315,7 @@ L(first_vec_x1_check): + incq %rdi + addq %rdi, %rax + VZEROUPPER_RETURN +- .p2align 4 ++ .p2align 4,, 6 + L(set_zero_end): + xorl %eax, %eax + VZEROUPPER_RETURN +@@ -428,5 +403,39 @@ L(last_vec_x3): + VZEROUPPER_RETURN + # endif + ++ .p2align 4 ++L(cross_page_boundary): ++ /* Save pointer before aligning as its original value is necessary for ++ computer return address if byte is found or adjusting length if it ++ is not and this is memchr. */ ++ movq %rdi, %rcx ++ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr ++ and rdi for rawmemchr. */ ++ orq $(VEC_SIZE - 1), %ALGN_PTR_REG ++ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 ++ vpmovmskb %ymm1, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Calculate length until end of page (length checked for a match). */ ++ leaq 1(%ALGN_PTR_REG), %rsi ++ subq %RRAW_PTR_REG, %rsi ++# ifdef USE_AS_WMEMCHR ++ /* NB: Divide bytes by 4 to get wchar_t count. */ ++ shrl $2, %esi ++# endif ++# endif ++ /* Remove the leading bytes. */ ++ sarxl %ERAW_PTR_REG, %eax, %eax ++# ifndef USE_AS_RAWMEMCHR ++ /* Check the end of data. */ ++ cmpq %rsi, %rdx ++ jbe L(first_vec_x0) ++# endif ++ testl %eax, %eax ++ jz L(cross_page_continue) ++ bsfl %eax, %eax ++ addq %RRAW_PTR_REG, %rax ++ VZEROUPPER_RETURN ++ ++ + END (MEMCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-287.patch b/SOURCES/glibc-upstream-2.34-287.patch new file mode 100644 index 0000000..083be9d --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-287.patch @@ -0,0 +1,130 @@ +commit 3c87383a20daff9a230439e31b778716bfed4d8b +Author: Noah Goldstein +Date: Mon Jun 6 21:11:34 2022 -0700 + + x86: Shrink code size of memchr-evex.S + + This is not meant as a performance optimization. The previous code was + far to liberal in aligning targets and wasted code size unnecissarily. + + The total code size saving is: 64 bytes + + There are no non-negligible changes in the benchmarks. + Geometric Mean of all benchmarks New / Old: 1.000 + + Full xcheck passes on x86_64. + Reviewed-by: H.J. Lu + + (cherry picked from commit 56da3fe1dd075285fa8186d44b3c28e68c687e62) + +diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S +index 4d0ed6d136f099e1..68381c99a4948134 100644 +--- a/sysdeps/x86_64/multiarch/memchr-evex.S ++++ b/sysdeps/x86_64/multiarch/memchr-evex.S +@@ -88,7 +88,7 @@ + # define PAGE_SIZE 4096 + + .section SECTION(.text),"ax",@progbits +-ENTRY (MEMCHR) ++ENTRY_P2ALIGN (MEMCHR, 6) + # ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ + test %RDX_LP, %RDX_LP +@@ -131,22 +131,24 @@ L(zero): + xorl %eax, %eax + ret + +- .p2align 5 ++ .p2align 4 + L(first_vec_x0): +- /* Check if first match was before length. */ +- tzcntl %eax, %eax +- xorl %ecx, %ecx +- cmpl %eax, %edx +- leaq (%rdi, %rax, CHAR_SIZE), %rax +- cmovle %rcx, %rax ++ /* Check if first match was before length. NB: tzcnt has false data- ++ dependency on destination. eax already had a data-dependency on esi ++ so this should have no affect here. */ ++ tzcntl %eax, %esi ++# ifdef USE_AS_WMEMCHR ++ leaq (%rdi, %rsi, CHAR_SIZE), %rdi ++# else ++ addq %rsi, %rdi ++# endif ++ xorl %eax, %eax ++ cmpl %esi, %edx ++ cmovg %rdi, %rax + ret +-# else +- /* NB: first_vec_x0 is 17 bytes which will leave +- cross_page_boundary (which is relatively cold) close enough +- to ideal alignment. So only realign L(cross_page_boundary) if +- rawmemchr. */ +- .p2align 4 + # endif ++ ++ .p2align 4 + L(cross_page_boundary): + /* Save pointer before aligning as its original value is + necessary for computer return address if byte is found or +@@ -400,10 +402,14 @@ L(last_2x_vec): + L(zero_end): + ret + ++L(set_zero_end): ++ xorl %eax, %eax ++ ret + + .p2align 4 + L(first_vec_x1_check): +- tzcntl %eax, %eax ++ /* eax must be non-zero. Use bsfl to save code size. */ ++ bsfl %eax, %eax + /* Adjust length. */ + subl $-(CHAR_PER_VEC * 4), %edx + /* Check if match within remaining length. */ +@@ -412,9 +418,6 @@ L(first_vec_x1_check): + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + ret +-L(set_zero_end): +- xorl %eax, %eax +- ret + + .p2align 4 + L(loop_4x_vec_end): +@@ -464,7 +467,7 @@ L(loop_4x_vec_end): + # endif + ret + +- .p2align 4 ++ .p2align 4,, 10 + L(last_vec_x1_return): + tzcntl %eax, %eax + # if defined USE_AS_WMEMCHR || RET_OFFSET != 0 +@@ -496,6 +499,7 @@ L(last_vec_x3_return): + # endif + + # ifndef USE_AS_RAWMEMCHR ++ .p2align 4,, 5 + L(last_4x_vec_or_less_cmpeq): + VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax +@@ -546,7 +550,7 @@ L(last_4x_vec): + # endif + andl %ecx, %eax + jz L(zero_end2) +- tzcntl %eax, %eax ++ bsfl %eax, %eax + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + L(zero_end2): + ret +@@ -562,6 +566,6 @@ L(last_vec_x3): + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + ret + # endif +- ++ /* 7 bytes from next cache line. */ + END (MEMCHR) + #endif diff --git a/SOURCES/glibc-upstream-2.34-288.patch b/SOURCES/glibc-upstream-2.34-288.patch new file mode 100644 index 0000000..6565455 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-288.patch @@ -0,0 +1,33 @@ +commit 820504e3edd7276bf869d543ad5b57187ff9c9b6 +Author: Noah Goldstein +Date: Fri Jun 3 18:52:37 2022 -0500 + + x86: ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST expect no transactions + + Give fall-through path to `vzeroupper` and taken-path to `vzeroall`. + + Generally even on machines with RTM the expectation is the + string-library functions will not be called in transactions. + Reviewed-by: H.J. Lu + + (cherry picked from commit c28db9cb29a7d6cf3ce08fd8445e6b7dea03f35b) + +diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h +index 93e44be22e2275f1..04478b097cdffe20 100644 +--- a/sysdeps/x86_64/sysdep.h ++++ b/sysdeps/x86_64/sysdep.h +@@ -99,11 +99,11 @@ lose: \ + to avoid RTM abort triggered by VZEROUPPER inside transactionally. */ + #define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \ + xtest; \ +- jz 1f; \ +- vzeroall; \ ++ jnz 1f; \ ++ vzeroupper; \ + ret; \ + 1: \ +- vzeroupper; \ ++ vzeroall; \ + ret + + /* Can be used to replace vzeroupper that is not directly before a diff --git a/SOURCES/glibc-upstream-2.34-289.patch b/SOURCES/glibc-upstream-2.34-289.patch new file mode 100644 index 0000000..fe9c335 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-289.patch @@ -0,0 +1,41 @@ +commit fc54e1fae854e6ee6361cd4ddf900c36fce8158e +Author: Noah Goldstein +Date: Wed Jun 8 21:16:51 2022 -0700 + + x86: Align varshift table to 32-bytes + + This ensures the load will never split a cache line. + + (cherry picked from commit 0f91811333f23b61cf681cab2704b35a0a073b97) + +diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c +index 45267b0a6823459a..1f563542666bc4f1 100644 +--- a/sysdeps/x86_64/multiarch/varshift.c ++++ b/sysdeps/x86_64/multiarch/varshift.c +@@ -16,9 +16,10 @@ + License along with the GNU C Library; if not, see + . */ + +-#include "varshift.h" ++#include + +-const int8_t ___m128i_shift_right[31] attribute_hidden = ++const int8_t ___m128i_shift_right[31] attribute_hidden ++ __attribute__((aligned(32))) = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h +index 32f2173dd2a95b3a..745d48fa7c775136 100644 +--- a/sysdeps/x86_64/multiarch/varshift.h ++++ b/sysdeps/x86_64/multiarch/varshift.h +@@ -19,7 +19,8 @@ + #include + #include + +-extern const int8_t ___m128i_shift_right[31] attribute_hidden; ++extern const int8_t ___m128i_shift_right[31] attribute_hidden ++ __attribute__ ((aligned (32))); + + static __inline__ __m128i + __m128i_shift_right (__m128i value, unsigned long int offset) diff --git a/SOURCES/glibc-upstream-2.34-290.patch b/SOURCES/glibc-upstream-2.34-290.patch new file mode 100644 index 0000000..326109c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-290.patch @@ -0,0 +1,56 @@ +commit 6e008c884dad5a25f91085c68d044bb5e2d63761 +Author: Noah Goldstein +Date: Tue Jun 14 13:50:11 2022 -0700 + + x86: Fix misordered logic for setting `rep_movsb_stop_threshold` + + Move the setting of `rep_movsb_stop_threshold` to after the tunables + have been collected so that the `rep_movsb_stop_threshold` (which + is used to redirect control flow to the non_temporal case) will + use any user value for `non_temporal_threshold` (set using + glibc.cpu.x86_non_temporal_threshold) + + (cherry picked from commit 035591551400cfc810b07244a015c9411e8bff7c) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 2e43e67e4f4037d3..560bf260e8fbd7bf 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -898,18 +898,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) + rep_movsb_threshold = 2112; + +- unsigned long int rep_movsb_stop_threshold; +- /* ERMS feature is implemented from AMD Zen3 architecture and it is +- performing poorly for data above L2 cache size. Henceforth, adding +- an upper bound threshold parameter to limit the usage of Enhanced +- REP MOVSB operations and setting its value to L2 cache size. */ +- if (cpu_features->basic.kind == arch_kind_amd) +- rep_movsb_stop_threshold = core; +- /* Setting the upper bound of ERMS to the computed value of +- non-temporal threshold for architectures other than AMD. */ +- else +- rep_movsb_stop_threshold = non_temporal_threshold; +- + /* The default threshold to use Enhanced REP STOSB. */ + unsigned long int rep_stosb_threshold = 2048; + +@@ -951,6 +939,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + SIZE_MAX); + #endif + ++ unsigned long int rep_movsb_stop_threshold; ++ /* ERMS feature is implemented from AMD Zen3 architecture and it is ++ performing poorly for data above L2 cache size. Henceforth, adding ++ an upper bound threshold parameter to limit the usage of Enhanced ++ REP MOVSB operations and setting its value to L2 cache size. */ ++ if (cpu_features->basic.kind == arch_kind_amd) ++ rep_movsb_stop_threshold = core; ++ /* Setting the upper bound of ERMS to the computed value of ++ non-temporal threshold for architectures other than AMD. */ ++ else ++ rep_movsb_stop_threshold = non_temporal_threshold; ++ + cpu_features->data_cache_size = data; + cpu_features->shared_cache_size = shared; + cpu_features->non_temporal_threshold = non_temporal_threshold; diff --git a/SOURCES/glibc-upstream-2.34-291.patch b/SOURCES/glibc-upstream-2.34-291.patch new file mode 100644 index 0000000..849476f --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-291.patch @@ -0,0 +1,38 @@ +commit 9d50e162eef88e1f870a941b0a973060e984e7ca +Author: Noah Goldstein +Date: Tue Jun 14 15:37:28 2022 -0700 + + x86: Add sse42 implementation to strcmp's ifunc + + This has been missing since the the ifuncs where added. + + The performance of SSE4.2 is preferable to to SSE2. + + Measured on Tigerlake with N = 20 runs. + Geometric Mean of all benchmarks SSE4.2 / SSE2: 0.906 + + (cherry picked from commit ff439c47173565fbff4f0f78d07b0f14e4a7db05) + +diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c +index 7c2901bf44456259..b457fb4c150e4407 100644 +--- a/sysdeps/x86_64/multiarch/strcmp.c ++++ b/sysdeps/x86_64/multiarch/strcmp.c +@@ -29,6 +29,7 @@ + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; +@@ -53,6 +54,10 @@ IFUNC_SELECTOR (void) + return OPTIMIZE (avx2); + } + ++ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2) ++ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) ++ return OPTIMIZE (sse42); ++ + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) + return OPTIMIZE (sse2_unaligned); + diff --git a/SOURCES/glibc-upstream-2.34-292.patch b/SOURCES/glibc-upstream-2.34-292.patch new file mode 100644 index 0000000..492f541 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-292.patch @@ -0,0 +1,54 @@ +commit 94b0dc9419bd038cb85b364a7556569386c31741 +Author: Noah Goldstein +Date: Wed Jun 15 10:41:29 2022 -0700 + + x86: Add bounds `x86_non_temporal_threshold` + + The lower-bound (16448) and upper-bound (SIZE_MAX / 16) are assumed + by memmove-vec-unaligned-erms. + + The lower-bound is needed because memmove-vec-unaligned-erms unrolls + the loop aggressively in the L(large_memset_4x) case. + + The upper-bound is needed because memmove-vec-unaligned-erms + right-shifts the value of `x86_non_temporal_threshold` by + LOG_4X_MEMCPY_THRESH (4) which without a bound may overflow. + + The lack of lower-bound can be a correctness issue. The lack of + upper-bound cannot. + + (cherry picked from commit b446822b6ae4e8149902a78cdd4a886634ad6321) + +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 28ff502990c2a10f..5ab3212f34e3dc37 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -47,7 +47,7 @@ glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0xffffffffffffffff) + glibc.elision.skip_lock_busy: 3 (min: -2147483648, max: 2147483647) + glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0xffffffffffffffff) + glibc.cpu.x86_rep_stosb_threshold: 0x800 (min: 0x1, max: 0xffffffffffffffff) +-glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x0, max: 0xffffffffffffffff) ++glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x4040, max: 0x0fffffffffffffff) + glibc.cpu.x86_shstk: + glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff) + glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647) +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 560bf260e8fbd7bf..8f85f70858413ebe 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -931,8 +931,14 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + + TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX); + TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX); ++ /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of ++ 'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best ++ if that operation cannot overflow. Minimum of 0x4040 (16448) because the ++ L(large_memset_4x) loops need 64-byte to cache align and enough space for ++ at least 1 iteration of 4x PAGE_SIZE unrolled loop. Both values are ++ reflected in the manual. */ + TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold, +- 0, SIZE_MAX); ++ 0x4040, SIZE_MAX >> 4); + TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold, + minimum_rep_movsb_threshold, SIZE_MAX); + TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1, diff --git a/SOURCES/glibc-upstream-2.34-293.patch b/SOURCES/glibc-upstream-2.34-293.patch new file mode 100644 index 0000000..81a4e0e --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-293.patch @@ -0,0 +1,88 @@ +commit ba1c3f23d9ba63c38333116eec6043c471c378c4 +Author: Noah Goldstein +Date: Wed Jun 15 10:41:28 2022 -0700 + + x86: Cleanup bounds checking in large memcpy case + + 1. Fix incorrect lower-bound threshold in L(large_memcpy_2x). + Previously was using `__x86_rep_movsb_threshold` and should + have been using `__x86_shared_non_temporal_threshold`. + + 2. Avoid reloading __x86_shared_non_temporal_threshold before + the L(large_memcpy_4x) bounds check. + + 3. Document the second bounds check for L(large_memcpy_4x) + more clearly. + + (cherry picked from commit 89a25c6f64746732b87eaf433af0964b564d4a92) + +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index 7b27cbdda5fb99f7..618d46d8ce28828c 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -118,7 +118,13 @@ + # define LARGE_LOAD_SIZE (VEC_SIZE * 4) + #endif + +-/* Amount to shift rdx by to compare for memcpy_large_4x. */ ++/* Amount to shift __x86_shared_non_temporal_threshold by for ++ bound for memcpy_large_4x. This is essentially use to to ++ indicate that the copy is far beyond the scope of L3 ++ (assuming no user config x86_non_temporal_threshold) and to ++ use a more aggressively unrolled loop. NB: before ++ increasing the value also update initialization of ++ x86_non_temporal_threshold. */ + #ifndef LOG_4X_MEMCPY_THRESH + # define LOG_4X_MEMCPY_THRESH 4 + #endif +@@ -724,9 +730,14 @@ L(skip_short_movsb_check): + .p2align 4,, 10 + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + L(large_memcpy_2x_check): +- cmp __x86_rep_movsb_threshold(%rip), %RDX_LP +- jb L(more_8x_vec_check) ++ /* Entry from L(large_memcpy_2x) has a redundant load of ++ __x86_shared_non_temporal_threshold(%rip). L(large_memcpy_2x) ++ is only use for the non-erms memmove which is generally less ++ common. */ + L(large_memcpy_2x): ++ mov __x86_shared_non_temporal_threshold(%rip), %R11_LP ++ cmp %R11_LP, %RDX_LP ++ jb L(more_8x_vec_check) + /* To reach this point it is impossible for dst > src and + overlap. Remaining to check is src > dst and overlap. rcx + already contains dst - src. Negate rcx to get src - dst. If +@@ -774,18 +785,21 @@ L(large_memcpy_2x): + /* ecx contains -(dst - src). not ecx will return dst - src - 1 + which works for testing aliasing. */ + notl %ecx ++ movq %rdx, %r10 + testl $(PAGE_SIZE - VEC_SIZE * 8), %ecx + jz L(large_memcpy_4x) + +- movq %rdx, %r10 +- shrq $LOG_4X_MEMCPY_THRESH, %r10 +- cmp __x86_shared_non_temporal_threshold(%rip), %r10 ++ /* r11 has __x86_shared_non_temporal_threshold. Shift it left ++ by LOG_4X_MEMCPY_THRESH to get L(large_memcpy_4x) threshold. ++ */ ++ shlq $LOG_4X_MEMCPY_THRESH, %r11 ++ cmp %r11, %rdx + jae L(large_memcpy_4x) + + /* edx will store remainder size for copying tail. */ + andl $(PAGE_SIZE * 2 - 1), %edx + /* r10 stores outer loop counter. */ +- shrq $((LOG_PAGE_SIZE + 1) - LOG_4X_MEMCPY_THRESH), %r10 ++ shrq $(LOG_PAGE_SIZE + 1), %r10 + /* Copy 4x VEC at a time from 2 pages. */ + .p2align 4 + L(loop_large_memcpy_2x_outer): +@@ -850,7 +864,6 @@ L(large_memcpy_2x_end): + + .p2align 4 + L(large_memcpy_4x): +- movq %rdx, %r10 + /* edx will store remainder size for copying tail. */ + andl $(PAGE_SIZE * 4 - 1), %edx + /* r10 stores outer loop counter. */ diff --git a/SOURCES/glibc-upstream-2.34-294.patch b/SOURCES/glibc-upstream-2.34-294.patch new file mode 100644 index 0000000..a7bd391 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-294.patch @@ -0,0 +1,27 @@ +commit c51d8d383cfb92142b86d8d1822159f3bea10d16 +Author: Noah Goldstein +Date: Thu Jun 16 15:01:08 2022 -0700 + + x86: Add BMI1/BMI2 checks for ISA_V3 check + + BMI1/BMI2 are part of the ISA V3 requirements: + https://en.wikipedia.org/wiki/X86-64 + + And defined by GCC when building with `-march=x86-64-v3` + + (cherry picked from commit 8da9f346cb2051844348785b8a932ec44489e0b7) + +diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c +index 49ef4aa6122072cf..07815381122c94c3 100644 +--- a/sysdeps/x86/isa-level.c ++++ b/sysdeps/x86/isa-level.c +@@ -47,7 +47,8 @@ + # endif + + # if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ +- && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE ++ && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \ ++ && defined __BMI__ && defined __BMI2__ + /* NB: ISAs in x86-64 ISA level v3 are used. */ + # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 + # else diff --git a/SOURCES/glibc-upstream-2.34-295.patch b/SOURCES/glibc-upstream-2.34-295.patch new file mode 100644 index 0000000..88d89e8 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-295.patch @@ -0,0 +1,28 @@ +commit d201c59177b98946d7f80145e7b4d02991d04805 +Author: Noah Goldstein +Date: Fri Jun 24 09:42:12 2022 -0700 + + x86: Align entry for memrchr to 64-bytes. + + The function was tuned around 64-byte entry alignment and performs + better for all sizes with it. + + As well different code boths where explicitly written to touch the + minimum number of cache line i.e sizes <= 32 touch only the entry + cache line. + + (cherry picked from commit 227afaa67213efcdce6a870ef5086200f1076438) + +diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S +index 5f8e0be18cfe4fad..edd8180ba1ede9a5 100644 +--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S +@@ -35,7 +35,7 @@ + # define VEC_SIZE 32 + # define PAGE_SIZE 4096 + .section SECTION(.text), "ax", @progbits +-ENTRY(MEMRCHR) ++ENTRY_P2ALIGN(MEMRCHR, 6) + # ifdef __ILP32__ + /* Clear upper bits. */ + and %RDX_LP, %RDX_LP diff --git a/SOURCES/glibc-upstream-2.34-296.patch b/SOURCES/glibc-upstream-2.34-296.patch new file mode 100644 index 0000000..eecd005 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-296.patch @@ -0,0 +1,56 @@ +commit aadd0a1c7c89d016e1186c81c0efcafa36bf84fc +Author: Noah Goldstein +Date: Fri Jun 24 09:42:15 2022 -0700 + + x86: Put wcs{n}len-sse4.1 in the sse4.1 text section + + Previously was missing but the two implementations shouldn't get in + the sse2 (generic) text section. + + (cherry picked from commit afc6e4328ff80973bde50d5401691b4c4b2e522c) + +diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S +index 031753a91763b351..762f4755020c35f9 100644 +--- a/sysdeps/x86_64/multiarch/strlen-vec.S ++++ b/sysdeps/x86_64/multiarch/strlen-vec.S +@@ -28,6 +28,10 @@ + # define SHIFT_RETURN + #endif + ++#ifndef SECTION ++# define SECTION(p) p ++#endif ++ + /* Long lived register in strlen(s), strnlen(s, n) are: + + %xmm3 - zero +@@ -37,7 +41,7 @@ + */ + + +-.text ++ .section SECTION(.text),"ax",@progbits + ENTRY(strlen) + + /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ +diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S +index 7e62621afc729492..e306a77f51e650d1 100644 +--- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S ++++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S +@@ -1,4 +1,5 @@ + #define AS_WCSLEN + #define strlen __wcslen_sse4_1 ++#define SECTION(p) p##.sse4.1 + + #include "strlen-vec.S" +diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +index 5fa51fe07cbbdf5c..d2f7dd6e2254736c 100644 +--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S ++++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +@@ -1,5 +1,6 @@ + #define AS_WCSLEN + #define AS_STRNLEN + #define strlen __wcsnlen_sse4_1 ++#define SECTION(p) p##.sse4.1 + + #include "strlen-vec.S" diff --git a/SOURCES/glibc-upstream-2.34-297.patch b/SOURCES/glibc-upstream-2.34-297.patch new file mode 100644 index 0000000..1bc2f3c --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-297.patch @@ -0,0 +1,25 @@ +commit f4598f0351559f1a4176d7ce0154423d98bcfb0d +Author: Noah Goldstein +Date: Wed Jun 29 16:07:04 2022 -0700 + + x86: Add definition for __wmemset_chk AVX2 RTM in ifunc impl list + + This was simply missing and meant we weren't testing it properly. + + (cherry picked from commit 2a1099020cdc1e4c9c928156aa85c8cf9d540291) + +diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +index 043821278fdb6d8f..8d649e263eb24b8a 100644 +--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c +@@ -1032,6 +1032,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + CPU_FEATURE_USABLE (AVX2), + __wmemset_chk_avx2_unaligned) ++ IFUNC_IMPL_ADD (array, i, __wmemset_chk, ++ (CPU_FEATURE_USABLE (AVX2) ++ && CPU_FEATURE_USABLE (RTM)), ++ __wmemset_chk_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) diff --git a/SOURCES/glibc-upstream-2.34-298.patch b/SOURCES/glibc-upstream-2.34-298.patch new file mode 100644 index 0000000..ae07d14 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-298.patch @@ -0,0 +1,124 @@ +commit 7079931c51547854323fe2ed6fdccf2a1b8b04d7 +Author: Noah Goldstein +Date: Wed Jun 29 16:07:05 2022 -0700 + + x86: Move and slightly improve memset_erms + + Implementation wise: + 1. Remove the VZEROUPPER as memset_{impl}_unaligned_erms does not + use the L(stosb) label that was previously defined. + + 2. Don't give the hotpath (fallthrough) to zero size. + + Code positioning wise: + + Move memset_{chk}_erms to its own file. Leaving it in between the + memset_{impl}_unaligned both adds unnecessary complexity to the + file and wastes space in a relatively hot cache section. + + (cherry picked from commit 4a3f29e7e475dd4e7cce2a24c187e6fb7b5b0a05) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index 0e39e63ef6be6a86..da9f16286a763556 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -29,6 +29,7 @@ sysdep_routines += \ + memset-avx2-unaligned-erms-rtm \ + memset-avx512-no-vzeroupper \ + memset-avx512-unaligned-erms \ ++ memset-erms \ + memset-evex-unaligned-erms \ + memset-sse2-unaligned-erms \ + rawmemchr-avx2 \ +diff --git a/sysdeps/x86_64/multiarch/memset-erms.S b/sysdeps/x86_64/multiarch/memset-erms.S +new file mode 100644 +index 0000000000000000..e83cccc731f0a7ea +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memset-erms.S +@@ -0,0 +1,44 @@ ++/* memset implement with rep stosb ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++ ++#if defined USE_MULTIARCH && IS_IN (libc) ++ .text ++ENTRY (__memset_chk_erms) ++ cmp %RDX_LP, %RCX_LP ++ jb HIDDEN_JUMPTARGET (__chk_fail) ++END (__memset_chk_erms) ++ ++/* Only used to measure performance of REP STOSB. */ ++ENTRY (__memset_erms) ++ /* Skip zero length. */ ++ test %RDX_LP, %RDX_LP ++ jz L(stosb_return_zero) ++ mov %RDX_LP, %RCX_LP ++ movzbl %sil, %eax ++ mov %RDI_LP, %RDX_LP ++ rep stosb ++ mov %RDX_LP, %RAX_LP ++ ret ++L(stosb_return_zero): ++ movq %rdi, %rax ++ ret ++END (__memset_erms) ++#endif +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index abc12d9cda1b3843..905d0fa4643d5768 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -156,37 +156,6 @@ L(entry_from_wmemset): + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMSET_SYMBOL (__memset, unaligned)) + +-# if VEC_SIZE == 16 +-ENTRY (__memset_chk_erms) +- cmp %RDX_LP, %RCX_LP +- jb HIDDEN_JUMPTARGET (__chk_fail) +-END (__memset_chk_erms) +- +-/* Only used to measure performance of REP STOSB. */ +-ENTRY (__memset_erms) +- /* Skip zero length. */ +- test %RDX_LP, %RDX_LP +- jnz L(stosb) +- movq %rdi, %rax +- ret +-# else +-/* Provide a hidden symbol to debugger. */ +- .hidden MEMSET_SYMBOL (__memset, erms) +-ENTRY (MEMSET_SYMBOL (__memset, erms)) +-# endif +-L(stosb): +- mov %RDX_LP, %RCX_LP +- movzbl %sil, %eax +- mov %RDI_LP, %RDX_LP +- rep stosb +- mov %RDX_LP, %RAX_LP +- VZEROUPPER_RETURN +-# if VEC_SIZE == 16 +-END (__memset_erms) +-# else +-END (MEMSET_SYMBOL (__memset, erms)) +-# endif +- + # if defined SHARED && IS_IN (libc) + ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + cmp %RDX_LP, %RCX_LP diff --git a/SOURCES/glibc-upstream-2.34-299.patch b/SOURCES/glibc-upstream-2.34-299.patch new file mode 100644 index 0000000..afa05e9 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-299.patch @@ -0,0 +1,163 @@ +commit 35f9c72c8bd7bc30deb412e966e2f548241b15d2 +Author: Noah Goldstein +Date: Wed Jun 29 16:07:15 2022 -0700 + + x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file + + The primary memmove_{impl}_unaligned_erms implementations don't + interact with this function. Putting them in same file both + wastes space and unnecessarily bloats a hot code section. + + (cherry picked from commit 21925f64730d52eb7d8b2fb62b412f8ab92b0caf) + +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index da9f16286a763556..b9ea5b60c2be1b0a 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -17,6 +17,7 @@ sysdep_routines += \ + memmove-avx-unaligned-erms-rtm \ + memmove-avx512-no-vzeroupper \ + memmove-avx512-unaligned-erms \ ++ memmove-erms \ + memmove-evex-unaligned-erms \ + memmove-sse2-unaligned-erms \ + memmove-ssse3 \ +diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S +new file mode 100644 +index 0000000000000000..2d3a6ccb76d77052 +--- /dev/null ++++ b/sysdeps/x86_64/multiarch/memmove-erms.S +@@ -0,0 +1,72 @@ ++/* memcpy/mempcpy/memmove implement with rep movsb ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++ ++#if defined USE_MULTIARCH && IS_IN (libc) ++ .text ++ENTRY (__mempcpy_chk_erms) ++ cmp %RDX_LP, %RCX_LP ++ jb HIDDEN_JUMPTARGET (__chk_fail) ++END (__mempcpy_chk_erms) ++ ++/* Only used to measure performance of REP MOVSB. */ ++ENTRY (__mempcpy_erms) ++ mov %RDI_LP, %RAX_LP ++ /* Skip zero length. */ ++ test %RDX_LP, %RDX_LP ++ jz 2f ++ add %RDX_LP, %RAX_LP ++ jmp L(start_movsb) ++END (__mempcpy_erms) ++ ++ENTRY (__memmove_chk_erms) ++ cmp %RDX_LP, %RCX_LP ++ jb HIDDEN_JUMPTARGET (__chk_fail) ++END (__memmove_chk_erms) ++ ++ENTRY (__memmove_erms) ++ movq %rdi, %rax ++ /* Skip zero length. */ ++ test %RDX_LP, %RDX_LP ++ jz 2f ++L(start_movsb): ++ mov %RDX_LP, %RCX_LP ++ cmp %RSI_LP, %RDI_LP ++ jb 1f ++ /* Source == destination is less common. */ ++ je 2f ++ lea (%rsi,%rcx), %RDX_LP ++ cmp %RDX_LP, %RDI_LP ++ jb L(movsb_backward) ++1: ++ rep movsb ++2: ++ ret ++L(movsb_backward): ++ leaq -1(%rdi,%rcx), %rdi ++ leaq -1(%rsi,%rcx), %rsi ++ std ++ rep movsb ++ cld ++ ret ++END (__memmove_erms) ++strong_alias (__memmove_erms, __memcpy_erms) ++strong_alias (__memmove_chk_erms, __memcpy_chk_erms) ++#endif +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index 618d46d8ce28828c..93c7e6883a254434 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -239,56 +239,6 @@ L(start): + #endif + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMMOVE_SYMBOL (__memmove, unaligned)) +-# if VEC_SIZE == 16 +-ENTRY (__mempcpy_chk_erms) +- cmp %RDX_LP, %RCX_LP +- jb HIDDEN_JUMPTARGET (__chk_fail) +-END (__mempcpy_chk_erms) +- +-/* Only used to measure performance of REP MOVSB. */ +-ENTRY (__mempcpy_erms) +- mov %RDI_LP, %RAX_LP +- /* Skip zero length. */ +- test %RDX_LP, %RDX_LP +- jz 2f +- add %RDX_LP, %RAX_LP +- jmp L(start_movsb) +-END (__mempcpy_erms) +- +-ENTRY (__memmove_chk_erms) +- cmp %RDX_LP, %RCX_LP +- jb HIDDEN_JUMPTARGET (__chk_fail) +-END (__memmove_chk_erms) +- +-ENTRY (__memmove_erms) +- movq %rdi, %rax +- /* Skip zero length. */ +- test %RDX_LP, %RDX_LP +- jz 2f +-L(start_movsb): +- mov %RDX_LP, %RCX_LP +- cmp %RSI_LP, %RDI_LP +- jb 1f +- /* Source == destination is less common. */ +- je 2f +- lea (%rsi,%rcx), %RDX_LP +- cmp %RDX_LP, %RDI_LP +- jb L(movsb_backward) +-1: +- rep movsb +-2: +- ret +-L(movsb_backward): +- leaq -1(%rdi,%rcx), %rdi +- leaq -1(%rsi,%rcx), %rsi +- std +- rep movsb +- cld +- ret +-END (__memmove_erms) +-strong_alias (__memmove_erms, __memcpy_erms) +-strong_alias (__memmove_chk_erms, __memcpy_chk_erms) +-# endif + + # ifdef SHARED + ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) diff --git a/SOURCES/glibc-upstream-2.34-300.patch b/SOURCES/glibc-upstream-2.34-300.patch new file mode 100644 index 0000000..5db8327 --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-300.patch @@ -0,0 +1,38 @@ +commit ccc54bd61c768b6a27f9305a0831b76a7b6d706f +Author: Noah Goldstein +Date: Wed Jun 29 18:56:18 2022 -0700 + + x86: Add missing IS_IN (libc) check to strncmp-sse4_2.S + + Was missing to for the multiarch build rtld-strncmp-sse4_2.os was + being built and exporting symbols: + + build/glibc/string/rtld-strncmp-sse4_2.os: + 0000000000000000 T __strncmp_sse42 + + Introduced in: + + commit 11ffcacb64a939c10cfc713746b8ec88837f5c4a + Author: H.J. Lu + Date: Wed Jun 21 12:10:50 2017 -0700 + + x86-64: Implement strcmp family IFUNC selectors in C + + (cherry picked from commit 96ac447d915ea5ecef3f9168cc13f4e731349a3b) + +diff --git a/sysdeps/x86_64/multiarch/strncmp-sse4_2.S b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S +index 22f51a0dfd2770c9..85dc363bf9d6273d 100644 +--- a/sysdeps/x86_64/multiarch/strncmp-sse4_2.S ++++ b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S +@@ -16,6 +16,8 @@ + License along with the GNU C Library; if not, see + . */ + +-#define STRCMP_SSE42 __strncmp_sse42 +-#define USE_AS_STRNCMP +-#include "strcmp-sse42.S" ++#if IS_IN (libc) ++# define STRCMP_SSE42 __strncmp_sse42 ++# define USE_AS_STRNCMP ++# include "strcmp-sse42.S" ++#endif diff --git a/SOURCES/glibc-upstream-2.34-301.patch b/SOURCES/glibc-upstream-2.34-301.patch new file mode 100644 index 0000000..4d8b2fb --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-301.patch @@ -0,0 +1,28 @@ +commit b991af50632a2da262b00b2375d9120f23b25525 +Author: Joseph Myers +Date: Wed May 25 14:37:28 2022 +0000 + + Update syscall-names.list for Linux 5.18 + + Linux 5.18 has no new syscalls. Update the version number in + syscall-names.list to reflect that it is still current for 5.18. + + Tested with build-many-glibcs.py. + + (cherry picked from commit 3d9926663cba19f40d26d8a8ab3b2a7cc09ffb13) + +diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list +index e2743c649586d97a..95370e2ec5dbc4a7 100644 +--- a/sysdeps/unix/sysv/linux/syscall-names.list ++++ b/sysdeps/unix/sysv/linux/syscall-names.list +@@ -21,8 +21,8 @@ + # This file can list all potential system calls. The names are only + # used if the installed kernel headers also provide them. + +-# The list of system calls is current as of Linux 5.17. +-kernel 5.17 ++# The list of system calls is current as of Linux 5.18. ++kernel 5.18 + + FAST_atomic_update + FAST_cmpxchg diff --git a/SOURCES/glibc-upstream-2.34-302.patch b/SOURCES/glibc-upstream-2.34-302.patch new file mode 100644 index 0000000..3c37bca --- /dev/null +++ b/SOURCES/glibc-upstream-2.34-302.patch @@ -0,0 +1,44 @@ +commit b2f32e746492615a6eb3e66fac1e766e32e8deb1 +Author: Florian Weimer +Date: Thu Jul 21 12:12:08 2022 +0200 + + malloc: Simplify implementation of __malloc_assert + + It is prudent not to run too much code after detecting heap + corruption, and __fxprintf is really complex. The line number + and file name do not carry much information, so it is not included + in the error message. (__libc_message only supports %s formatting.) + The function name and assertion should provide some context. + + Reviewed-by: Siddhesh Poyarekar + (cherry picked from commit ac8047cdf326504f652f7db97ec96c0e0cee052f) + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 7882c70f0a0312d1..d31e985ecce968fe 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -294,19 +294,14 @@ + # define __assert_fail(assertion, file, line, function) \ + __malloc_assert(assertion, file, line, function) + +-extern const char *__progname; +- +-static void ++_Noreturn static void + __malloc_assert (const char *assertion, const char *file, unsigned int line, + const char *function) + { +- (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n", +- __progname, __progname[0] ? ": " : "", +- file, line, +- function ? function : "", function ? ": " : "", +- assertion); +- fflush (stderr); +- abort (); ++ __libc_message (do_abort, "\ ++Fatal glibc error: malloc assertion failure in %s: %s\n", ++ function, assertion); ++ __builtin_unreachable (); + } + #endif + #endif diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index e8c2cfc..dcfb89f 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -148,7 +148,7 @@ end \ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 28%{?dist}.2 +Release: 40%{?dist} # In general, GPLv2+ is used by programs, LGPLv2+ is used for # libraries. @@ -215,7 +215,6 @@ rpm.define("__debug_install_post bash " .. wrapper ############################################################################## Patch1: glibc-fedora-nscd.patch Patch4: glibc-fedora-linux-tcsetattr.patch -Patch6: glibc-fedora-localedef.patch Patch8: glibc-fedora-manual-dircategory.patch Patch9: glibc-rh827510.patch Patch13: glibc-fedora-localedata-rh61908.patch @@ -378,11 +377,208 @@ Patch173: glibc-upstream-2.34-107.patch Patch174: glibc-rh2058224-1.patch Patch175: glibc-rh2058224-2.patch Patch176: glibc-rh2058230.patch -Patch177: glibc-rh2115828-1.patch -Patch178: glibc-rh2115828-2.patch -Patch179: glibc-rh2115828-3.patch -Patch180: glibc-rh2115828-4.patch -Patch181: glibc-rh2095450.patch +Patch177: glibc-rh2054789.patch +Patch178: glibc-upstream-2.34-108.patch +# glibc-2.34-109-gd64b08d5ba only changes NEWS. +Patch179: glibc-upstream-2.34-110.patch +Patch180: glibc-upstream-2.34-111.patch +Patch181: glibc-upstream-2.34-112.patch +Patch182: glibc-upstream-2.34-113.patch +Patch183: glibc-upstream-2.34-114.patch +# glibc-2.34-115-gd5d1c95aaf only changes NEWS. +# glibc-2.34-116-g852361b5a3 is glibc-rh2054789.patch. +Patch184: glibc-upstream-2.34-117.patch +Patch185: glibc-upstream-2.34-118.patch +Patch186: glibc-upstream-2.34-119.patch +Patch187: glibc-upstream-2.34-120.patch +Patch188: glibc-upstream-2.34-121.patch +Patch189: glibc-upstream-2.34-122.patch +Patch190: glibc-upstream-2.34-123.patch +Patch191: glibc-upstream-2.34-124.patch +Patch192: glibc-upstream-2.34-125.patch +Patch193: glibc-upstream-2.34-126.patch +Patch194: glibc-upstream-2.34-127.patch +Patch195: glibc-upstream-2.34-128.patch +Patch196: glibc-upstream-2.34-129.patch +Patch197: glibc-upstream-2.34-130.patch +Patch198: glibc-upstream-2.34-131.patch +Patch199: glibc-upstream-2.34-132.patch +Patch200: glibc-upstream-2.34-133.patch +Patch201: glibc-upstream-2.34-134.patch +Patch202: glibc-upstream-2.34-135.patch +Patch203: glibc-upstream-2.34-136.patch +Patch204: glibc-upstream-2.34-137.patch +Patch205: glibc-upstream-2.34-138.patch +Patch206: glibc-upstream-2.34-139.patch +Patch207: glibc-upstream-2.34-140.patch +Patch208: glibc-upstream-2.34-141.patch +Patch209: glibc-upstream-2.34-142.patch +Patch210: glibc-upstream-2.34-143.patch +Patch211: glibc-upstream-2.34-144.patch +Patch212: glibc-upstream-2.34-145.patch +Patch213: glibc-upstream-2.34-146.patch +Patch214: glibc-upstream-2.34-147.patch +Patch215: glibc-upstream-2.34-148.patch +Patch216: glibc-upstream-2.34-149.patch +Patch217: glibc-upstream-2.34-150.patch +Patch218: glibc-upstream-2.34-151.patch +Patch219: glibc-upstream-2.34-152.patch +Patch220: glibc-upstream-2.34-153.patch +Patch221: glibc-upstream-2.34-154.patch +Patch222: glibc-upstream-2.34-155.patch +Patch223: glibc-upstream-2.34-156.patch +Patch224: glibc-upstream-2.34-157.patch +Patch225: glibc-upstream-2.34-158.patch +Patch226: glibc-upstream-2.34-159.patch +Patch227: glibc-upstream-2.34-160.patch +# glibc-2.34-161-gceed89d089 only changes NEWS. +Patch228: glibc-upstream-2.34-162.patch +Patch229: glibc-upstream-2.34-163.patch +Patch230: glibc-upstream-2.34-164.patch +Patch231: glibc-upstream-2.34-165.patch +Patch232: glibc-upstream-2.34-166.patch +Patch233: glibc-upstream-2.34-167.patch +Patch234: glibc-upstream-2.34-168.patch +Patch235: glibc-upstream-2.34-169.patch +Patch236: glibc-upstream-2.34-170.patch +Patch237: glibc-upstream-2.34-171.patch +Patch238: glibc-upstream-2.34-172.patch +Patch239: glibc-upstream-2.34-173.patch +Patch240: glibc-upstream-2.34-174.patch +Patch241: glibc-upstream-2.34-175.patch +Patch242: glibc-upstream-2.34-176.patch +Patch243: glibc-upstream-2.34-177.patch +Patch244: glibc-upstream-2.34-178.patch +Patch245: glibc-upstream-2.34-179.patch +Patch246: glibc-upstream-2.34-180.patch +Patch247: glibc-upstream-2.34-181.patch +Patch248: glibc-upstream-2.34-182.patch +Patch249: glibc-upstream-2.34-183.patch +Patch250: glibc-upstream-2.34-184.patch +Patch251: glibc-upstream-2.34-185.patch +Patch252: glibc-upstream-2.34-186.patch +Patch253: glibc-upstream-2.34-187.patch +Patch254: glibc-upstream-2.34-188.patch +Patch255: glibc-upstream-2.34-189.patch +Patch256: glibc-upstream-2.34-190.patch +Patch257: glibc-upstream-2.34-191.patch +Patch258: glibc-upstream-2.34-192.patch +Patch259: glibc-upstream-2.34-193.patch +Patch260: glibc-upstream-2.34-194.patch +Patch261: glibc-upstream-2.34-195.patch +Patch262: glibc-upstream-2.34-196.patch +Patch263: glibc-upstream-2.34-197.patch +Patch264: glibc-upstream-2.34-198.patch +Patch265: glibc-upstream-2.34-199.patch +Patch266: glibc-upstream-2.34-200.patch +Patch267: glibc-upstream-2.34-201.patch +Patch268: glibc-upstream-2.34-202.patch +Patch269: glibc-upstream-2.34-203.patch +Patch270: glibc-upstream-2.34-204.patch +Patch271: glibc-upstream-2.34-205.patch +Patch272: glibc-upstream-2.34-206.patch +Patch273: glibc-upstream-2.34-207.patch +Patch274: glibc-upstream-2.34-208.patch +Patch275: glibc-upstream-2.34-209.patch +Patch276: glibc-upstream-2.34-210.patch +Patch277: glibc-upstream-2.34-211.patch +Patch278: glibc-upstream-2.34-212.patch +Patch279: glibc-upstream-2.34-213.patch +Patch280: glibc-upstream-2.34-214.patch +Patch281: glibc-upstream-2.34-215.patch +Patch282: glibc-upstream-2.34-216.patch +Patch283: glibc-upstream-2.34-217.patch +Patch284: glibc-upstream-2.34-218.patch +Patch285: glibc-upstream-2.34-219.patch +Patch286: glibc-upstream-2.34-220.patch +Patch287: glibc-upstream-2.34-221.patch +Patch288: glibc-upstream-2.34-222.patch +Patch289: glibc-upstream-2.34-223.patch +Patch290: glibc-upstream-2.34-224.patch +Patch291: glibc-upstream-2.34-225.patch +Patch292: glibc-upstream-2.34-226.patch +Patch293: glibc-upstream-2.34-227.patch +Patch294: glibc-upstream-2.34-228.patch +Patch295: glibc-upstream-2.34-229.patch +Patch296: glibc-upstream-2.34-230.patch +Patch297: glibc-upstream-2.34-231.patch +Patch298: glibc-upstream-2.34-232.patch +Patch299: glibc-upstream-2.34-233.patch +Patch300: glibc-upstream-2.34-234.patch +Patch301: glibc-upstream-2.34-235.patch +Patch302: glibc-upstream-2.34-236.patch +Patch303: glibc-upstream-2.34-237.patch +Patch304: glibc-upstream-2.34-238.patch +Patch305: glibc-upstream-2.34-239.patch +Patch306: glibc-upstream-2.34-240.patch +Patch307: glibc-upstream-2.34-241.patch +Patch308: glibc-upstream-2.34-242.patch +Patch309: glibc-upstream-2.34-243.patch +Patch310: glibc-upstream-2.34-244.patch +Patch311: glibc-upstream-2.34-245.patch +Patch312: glibc-upstream-2.34-246.patch +Patch313: glibc-upstream-2.34-247.patch +Patch314: glibc-upstream-2.34-248.patch +Patch315: glibc-upstream-2.34-249.patch +Patch316: glibc-upstream-2.34-250.patch +Patch317: glibc-upstream-2.34-251.patch +Patch318: glibc-upstream-2.34-252.patch +Patch319: glibc-upstream-2.34-253.patch +Patch320: glibc-upstream-2.34-254.patch +Patch321: glibc-upstream-2.34-255.patch +Patch322: glibc-upstream-2.34-256.patch +Patch323: glibc-upstream-2.34-257.patch +Patch324: glibc-upstream-2.34-258.patch +Patch325: glibc-upstream-2.34-259.patch +Patch326: glibc-upstream-2.34-260.patch +Patch327: glibc-upstream-2.34-261.patch +Patch328: glibc-upstream-2.34-262.patch +Patch329: glibc-upstream-2.34-263.patch +Patch330: glibc-upstream-2.34-264.patch +Patch331: glibc-upstream-2.34-265.patch +Patch332: glibc-upstream-2.34-266.patch +Patch333: glibc-upstream-2.34-267.patch +Patch334: glibc-upstream-2.34-268.patch +Patch335: glibc-rh2085529-1.patch +Patch336: glibc-rh2085529-2.patch +Patch337: glibc-rh2085529-3.patch +Patch338: glibc-rh2085529-4.patch +Patch339: glibc-upstream-2.34-269.patch +Patch340: glibc-upstream-2.34-270.patch +Patch341: glibc-upstream-2.34-271.patch +Patch342: glibc-upstream-2.34-272.patch +Patch343: glibc-upstream-2.34-273.patch +Patch344: glibc-rh2096191-1.patch +Patch345: glibc-rh2096191-2.patch +Patch346: glibc-upstream-2.34-274.patch +Patch347: glibc-upstream-2.34-275.patch +Patch348: glibc-upstream-2.34-276.patch +Patch349: glibc-upstream-2.34-277.patch +Patch350: glibc-upstream-2.34-278.patch +Patch351: glibc-upstream-2.34-279.patch +Patch352: glibc-upstream-2.34-280.patch +Patch353: glibc-upstream-2.34-281.patch +Patch354: glibc-upstream-2.34-282.patch +Patch355: glibc-upstream-2.34-283.patch +Patch356: glibc-upstream-2.34-284.patch +Patch357: glibc-upstream-2.34-285.patch +Patch358: glibc-upstream-2.34-286.patch +Patch359: glibc-upstream-2.34-287.patch +Patch360: glibc-upstream-2.34-288.patch +Patch361: glibc-upstream-2.34-289.patch +Patch362: glibc-upstream-2.34-290.patch +Patch363: glibc-upstream-2.34-291.patch +Patch364: glibc-upstream-2.34-292.patch +Patch365: glibc-upstream-2.34-293.patch +Patch366: glibc-upstream-2.34-294.patch +Patch367: glibc-upstream-2.34-295.patch +Patch368: glibc-upstream-2.34-296.patch +Patch369: glibc-upstream-2.34-297.patch +Patch370: glibc-upstream-2.34-298.patch +Patch371: glibc-upstream-2.34-299.patch +Patch372: glibc-upstream-2.34-300.patch +Patch373: glibc-upstream-2.34-301.patch +Patch374: glibc-upstream-2.34-302.patch ############################################################################## # Continued list of core "glibc" package information: @@ -1417,11 +1613,11 @@ done pushd build-%{target} %make_build install_root=%{glibc_sysroot} install %make_build install_root=%{glibc_sysroot} \ - install-locales -C ../localedata objdir=`pwd` + install-locale-files -C ../localedata objdir=`pwd` popd -# Locale creation may produce different groups of hardlinks in an -# unpredictable manner. Re-grouping makes those differences go away. -hardlink %{glibc_sysroot}/usr/lib/locale +# Locale creation via install-locale-files does not group identical files +# via hardlinks, so we must group them ourselves. +hardlink -c %{glibc_sysroot}/usr/lib/locale # install_different: # Install all core libraries into DESTDIR/SUBDIR. Either the file is @@ -1543,6 +1739,19 @@ $olddir/build-%{target}/elf/ld.so \ # glibc-all-langpacks versions) ln locale-archive locale-archive.real +# Almost half the LC_CTYPE files in langpacks are identical to the C.utf8 +# variant which is installed by default. When we keep them as hardlinks, +# each langpack ends up retaining a copy. If we convert these to symbolic +# links instead, we save ~350K each when they get installed that way. +# +# LC_MEASUREMENT and LC_PAPER also have several duplicates but we don't +# bother with these because they are only ~30 bytes each. +pushd %{glibc_sysroot}/usr/lib/locale +for f in $(find eo *_* -samefile C.utf8/LC_CTYPE); do + rm $f && ln -s '../C.utf8/LC_CTYPE' $f +done +popd + # Create the file lists for the language specific sub-packages: for i in eo *_* do @@ -2426,14 +2635,252 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog -* Thu Aug 11 2022 Florian Weimer - 2.34-28.2 -- ppc64le: Fix VSX register number in POWER9 strncpy (#2095450) +* Fri Jul 22 2022 Arjun Shankar - 2.34-40 +- Sync with upstream branch release/2.34/master, + commit b2f32e746492615a6eb3e66fac1e766e32e8deb1: +- malloc: Simplify implementation of __malloc_assert +- Update syscall-names.list for Linux 5.18 +- x86: Add missing IS_IN (libc) check to strncmp-sse4_2.S +- x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file +- x86: Move and slightly improve memset_erms +- x86: Add definition for __wmemset_chk AVX2 RTM in ifunc impl list +- x86: Put wcs{n}len-sse4.1 in the sse4.1 text section +- x86: Align entry for memrchr to 64-bytes. +- x86: Add BMI1/BMI2 checks for ISA_V3 check +- x86: Cleanup bounds checking in large memcpy case +- x86: Add bounds `x86_non_temporal_threshold` +- x86: Add sse42 implementation to strcmp's ifunc +- x86: Fix misordered logic for setting `rep_movsb_stop_threshold` +- x86: Align varshift table to 32-bytes +- x86: ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST expect no transactions +- x86: Shrink code size of memchr-evex.S +- x86: Shrink code size of memchr-avx2.S +- x86: Optimize memrchr-avx2.S +- x86: Optimize memrchr-evex.S +- x86: Optimize memrchr-sse2.S +- x86: Add COND_VZEROUPPER that can replace vzeroupper if no `ret` +- x86: Create header for VEC classes in x86 strings library +- x86_64: Add strstr function with 512-bit EVEX +- x86-64: Ignore r_addend for R_X86_64_GLOB_DAT/R_X86_64_JUMP_SLOT +- x86_64: Implement evex512 version of strlen, strnlen, wcslen and wcsnlen +- x86_64: Remove bzero optimization +- x86_64: Remove end of line trailing spaces +- nptl: Fix ___pthread_unregister_cancel_restore asynchronous restore +- linux: Fix mq_timereceive check for 32 bit fallback code (BZ 29304) -* Fri Aug 5 2022 Florian Weimer - 2.34-28.1 -- Backport __rseq_* symbols from glibc 2.35 (#2115828) +* Fri Jun 24 2022 Florian Weimer - 2.34-39 +- Add the no-aaaa DNS stub resolver option (#2096191) -* Tue Mar 15 2022 Florian Weimer - 2.34-28 -- Trim changelog (#2063247) +* Tue Jun 14 2022 Arjun Shankar - 2.34-38 +- Sync with upstream branch release/2.34/master, + commit 94ab2088c37d8e4285354af120b7ed6b887b9e53: +- nss: handle stat failure in check_reload_and_get (BZ #28752) +- nss: add assert to DB_LOOKUP_FCT (BZ #28752) +- nios2: Remove _dl_skip_args usage (BZ# 29187) +- hppa: Remove _dl_skip_args usage (BZ# 29165) +- nptl: Fix __libc_cleanup_pop_restore asynchronous restore (BZ#29214) + +* Wed Jun 8 2022 Florian Weimer - 2.34-37 +- Enable rseq by default and add GLIBC_2.35 rseq symbols (#2085529) + +* Wed Jun 8 2022 Florian Weimer - 2.34-36 +- Sync with upstream branch release/2.34/master, + commit 4c92a1041257c0155c6aa7a182fe5f78e477b0e6: +- powerpc: Fix VSX register number on __strncpy_power9 [BZ #29197] +- socket: Fix mistyped define statement in socket/sys/socket.h (BZ #29225) +- iconv: Use 64 bit stat for gconv_parseconfdir (BZ# 29213) +- catgets: Use 64 bit stat for __open_catalog (BZ# 29211) +- inet: Use 64 bit stat for ruserpass (BZ# 29210) +- socket: Use 64 bit stat for isfdtype (BZ# 29209) +- posix: Use 64 bit stat for fpathconf (_PC_ASYNC_IO) (BZ# 29208) +- posix: Use 64 bit stat for posix_fallocate fallback (BZ# 29207) +- misc: Use 64 bit stat for getusershell (BZ# 29204) +- misc: Use 64 bit stat for daemon (BZ# 29203) + +* Tue May 31 2022 Arjun Shankar - 2.34-35 +- Sync with upstream branch release/2.34/master, + commit ff450cdbdee0b8cb6b9d653d6d2fa892de29be31: +- Fix deadlock when pthread_atfork handler calls pthread_atfork or dlclose +- x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #29127] +- string.h: fix __fortified_attr_access macro call [BZ #29162] +- linux: Add a getauxval test [BZ #23293] +- rtld: Use generic argv adjustment in ld.so [BZ #23293] +- S390: Enable static PIE + +* Thu May 19 2022 Florian Weimer - 2.34-34 +- Sync with upstream branch release/2.34/master, + commit ede8d94d154157d269b18f3601440ac576c1f96a: +- csu: Implement and use _dl_early_allocate during static startup +- Linux: Introduce __brk_call for invoking the brk system call +- Linux: Implement a useful version of _startup_fatal +- ia64: Always define IA64_USE_NEW_STUB as a flag macro +- Linux: Define MMAP_CALL_INTERNAL +- i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls +- i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S +- elf: Remove __libc_init_secure +- Linux: Consolidate auxiliary vector parsing (redo) +- Linux: Include in dl-sysdep.c only for SHARED +- Revert "Linux: Consolidate auxiliary vector parsing" +- Linux: Consolidate auxiliary vector parsing +- Linux: Assume that NEED_DL_SYSINFO_DSO is always defined +- Linux: Remove DL_FIND_ARG_COMPONENTS +- Linux: Remove HAVE_AUX_SECURE, HAVE_AUX_XID, HAVE_AUX_PAGESIZE +- elf: Merge dl-sysdep.c into the Linux version +- elf: Remove unused NEED_DL_BASE_ADDR and _dl_base_addr +- x86: Optimize {str|wcs}rchr-evex +- x86: Optimize {str|wcs}rchr-avx2 +- x86: Optimize {str|wcs}rchr-sse2 +- x86: Cleanup page cross code in memcmp-avx2-movbe.S +- x86: Remove memcmp-sse4.S +- x86: Small improvements for wcslen +- x86: Remove AVX str{n}casecmp +- x86: Add EVEX optimized str{n}casecmp +- x86: Add AVX2 optimized str{n}casecmp +- x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S +- x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S +- x86: Remove strspn-sse2.S and use the generic implementation +- x86: Remove strpbrk-sse2.S and use the generic implementation +- x86: Remove strcspn-sse2.S and use the generic implementation +- x86: Optimize strspn in strspn-c.c +- x86: Optimize strcspn and strpbrk in strcspn-c.c +- x86: Code cleanup in strchr-evex and comment justifying branch +- x86: Code cleanup in strchr-avx2 and comment justifying branch +- x86_64: Remove bcopy optimizations +- x86-64: Remove bzero weak alias in SS2 memset +- x86_64/multiarch: Sort sysdep_routines and put one entry per line +- x86: Improve L to support L(XXX_SYMBOL (YYY, ZZZ)) +- fortify: Ensure that __glibc_fortify condition is a constant [BZ #29141] + +* Thu May 12 2022 Florian Weimer - 2.34-33 +- Sync with upstream branch release/2.34/master, + commit 91c2e6c3db44297bf4cb3a2e3c40236c5b6a0b23: +- dlfcn: Implement the RTLD_DI_PHDR request type for dlinfo +- manual: Document the dlinfo function +- x86: Fix fallback for wcsncmp_avx2 in strcmp-avx2.S [BZ #28896] +- x86: Fix bug in strncmp-evex and strncmp-avx2 [BZ #28895] +- x86: Set .text section in memset-vec-unaligned-erms +- x86-64: Optimize bzero +- x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only) +- x86: Improve vec generation in memset-vec-unaligned-erms.S +- x86-64: Fix strcmp-evex.S +- x86-64: Fix strcmp-avx2.S +- x86: Optimize strcmp-evex.S +- x86: Optimize strcmp-avx2.S +- manual: Clarify that abbreviations of long options are allowed +- Add HWCAP2_AFP, HWCAP2_RPRES from Linux 5.17 to AArch64 bits/hwcap.h +- aarch64: Add HWCAP2_ECV from Linux 5.16 +- Add SOL_MPTCP, SOL_MCTP from Linux 5.16 to bits/socket.h +- Update kernel version to 5.17 in tst-mman-consts.py +- Update kernel version to 5.16 in tst-mman-consts.py +- Update syscall lists for Linux 5.17 +- Add ARPHRD_CAN, ARPHRD_MCTP to net/if_arp.h +- Update kernel version to 5.15 in tst-mman-consts.py +- Add PF_MCTP, AF_MCTP from Linux 5.15 to bits/socket.h + +* Thu Apr 28 2022 Carlos O'Donell - 2.34-32 +- Sync with upstream branch release/2.34/master, + commit c66c92181ddbd82306537a608e8c0282587131de: +- posix/glob.c: update from gnulib (BZ#25659) +- linux: Fix fchmodat with AT_SYMLINK_NOFOLLOW for 64 bit time_t (BZ#29097) + +* Wed Apr 27 2022 Carlos O'Donell - 2.34-31 +- Sync with upstream branch release/2.34/master, + commit 55640ed3fde48360a8e8083be4843bd2dc7cecfe: +- i386: Regenerate ulps +- linux: Fix missing internal 64 bit time_t stat usage +- x86: Optimize L(less_vec) case in memcmp-evex-movbe.S +- x86: Don't set Prefer_No_AVX512 for processors with AVX512 and AVX-VNNI +- x86-64: Use notl in EVEX strcmp [BZ #28646] +- x86: Shrink memcmp-sse4.S code size +- x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h +- x86: Optimize memmove-vec-unaligned-erms.S +- x86-64: Replace movzx with movzbl +- x86-64: Remove Prefer_AVX2_STRCMP +- x86-64: Improve EVEX strcmp with masked load +- x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S +- x86: Optimize memset-vec-unaligned-erms.S +- x86: Optimize memcmp-evex-movbe.S for frontend behavior and size +- x86: Modify ENTRY in sysdep.h so that p2align can be specified +- x86-64: Optimize load of all bits set into ZMM register [BZ #28252] +- scripts/glibcelf.py: Mark as UNSUPPORTED on Python 3.5 and earlier +- dlfcn: Do not use rtld_active () to determine ld.so state (bug 29078) +- INSTALL: Rephrase -with-default-link documentation +- misc: Fix rare fortify crash on wchar funcs. [BZ 29030] +- Default to --with-default-link=no (bug 25812) +- scripts: Add glibcelf.py module + +* Thu Apr 21 2022 Carlos O'Donell - 2.34-30 +- Sync with upstream branch release/2.34/master, + commit 71326f1f2fd09dafb9c34404765fb88129e94237: +- nptl: Fix pthread_cancel cancelhandling atomic operations +- mips: Fix mips64n32 64 bit time_t stat support (BZ#29069) +- hurd: Fix arbitrary error code +- nptl: Handle spurious EINTR when thread cancellation is disabled (BZ#29029) +- S390: Add new s390 platform z16. +- NEWS: Update fixed bug list for LD_AUDIT backports. +- hppa: Fix bind-now audit (BZ #28857) +- elf: Replace tst-audit24bmod2.so with tst-audit24bmod2 +- Fix elf/tst-audit25a with default bind now toolchains +- elf: Fix runtime linker auditing on aarch64 (BZ #26643) +- elf: Issue la_symbind for bind-now (BZ #23734) +- elf: Fix initial-exec TLS access on audit modules (BZ #28096) +- elf: Add la_activity during application exit +- elf: Do not fail for failed dlmopen on audit modules (BZ #28061) +- elf: Issue audit la_objopen for vDSO +- elf: Add audit tests for modules with TLSDESC +- elf: Avoid unnecessary slowdown from profiling with audit (BZ#15533) +- elf: Add _dl_audit_pltexit +- elf: Add _dl_audit_pltenter +- elf: Add _dl_audit_preinit +- elf: Add _dl_audit_symbind_alt and _dl_audit_symbind +- elf: Add _dl_audit_objclose +- elf: Add _dl_audit_objsearch +- elf: Add _dl_audit_activity_map and _dl_audit_activity_nsid +- elf: Add _dl_audit_objopen +- elf: Move la_activity (LA_ACT_ADD) after _dl_add_to_namespace_list() (BZ #28062) +- elf: Move LAV_CURRENT to link_lavcurrent.h +- elf: Fix elf_get_dynamic_info() for bootstrap +- elf: Fix dynamic-link.h usage on rtld.c +- elf: Fix elf_get_dynamic_info definition +- elf: Avoid nested functions in the loader [BZ #27220] +- powerpc: Delete unneeded ELF_MACHINE_BEFORE_RTLD_RELOC +- hppa: Use END instead of PSEUDO_END in swapcontext.S +- hppa: Implement swapcontext in assembler (bug 28960) + +* Tue Mar 15 2022 Florian Weimer - 2.34-29 +- Sync with upstream branch release/2.34/master, + commit 224d8c1890b6c57c7e4e8ddbb792dd9552086704: +- debug: Synchronize feature guards in fortified functions [BZ #28746] +- debug: Autogenerate _FORTIFY_SOURCE tests +- Enable _FORTIFY_SOURCE=3 for gcc 12 and above +- fortify: Fix spurious warning with realpath +- __glibc_unsafe_len: Fix comment +- debug: Add tests for _FORTIFY_SOURCE=3 +- Make sure that the fortified function conditionals are constant +- Don't add access size hints to fortifiable functions +- nss: Protect against errno changes in function lookup (bug 28953) +- nss: Do not mention NSS test modules in +- io: Add fsync call in tst-stat +- hppa: Fix warnings from _dl_lookup_address +- nptl: Fix cleanups for stack grows up [BZ# 28899] +- hppa: Revise gettext trampoline design +- hppa: Fix swapcontext +- Fix elf/tst-audit2 on hppa +- localedef: Handle symbolic links when generating locale-archive +- NEWS: Add a bug fix entry for BZ #28896 +- x86: Fix TEST_NAME to make it a string in tst-strncmp-rtm.c +- x86: Test wcscmp RTM in the wcsncmp overflow case [BZ #28896] +- x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #28896] +- string: Add a testcase for wcsncmp with SIZE_MAX [BZ #28755] +- linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] +- Add reference to BZ#28860 on NEWS +- linux: Fix missing __convert_scm_timestamps (BZ #28860) + +* Tue Mar 08 2022 Arjun Shankar - 2.34-28 +- Reduce installed size of some langpacks by de-duplicating LC_CTYPE (#2054789) +- Fix localedef so it can handle symbolic links when generating locale-archive. +- Drop glibc-fedora-localedef.patch and adjust locale installation + accordingly so that installed content remains unchanged. * Mon Feb 28 2022 Florian Weimer - 2.34-27 - Fix regression (ldd crash) during dependency sorting in ld.so (#2058230)