diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index d49bc0f0564..90512d4f276 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1679,13 +1679,14 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, va += 8 * idx; - si_cs_emit_write_event_eop(cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - V_028A90_PS_DONE, 0, - EOP_DST_SEL_TC_L2, - EOP_DATA_SEL_GDS, - va, EOP_DATA_GDS(0, 1), 0); + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1769,13 +1770,14 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, va += 8 * idx; - si_cs_emit_write_event_eop(cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - V_028A90_PS_DONE, 0, - EOP_DST_SEL_TC_L2, - EOP_DATA_SEL_GDS, - va, EOP_DATA_GDS(0, 1), 0); + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); cmd_buffer->state.active_pipeline_gds_queries--; } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 9d9491d4361..2eb3ba4e64e 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -573,9 +573,11 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, if (chip_class >= GFX7 && family != CHIP_STONEY) hardware_lds_size = 65536; - num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); + if (input_patch_size + output_patch_size) + num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); /* Make sure the output data fits in the offchip buffer */ - num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size); + if (output_patch_size) + num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size); /* Not necessary for correctness, but improves performance. The * specific value is taken from the proprietary driver. */ diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 1eef6aac70c..a6a663d97a6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -402,10 +402,13 @@ void cso_destroy_context( struct cso_context *ctx ) PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); int maxcb = scr->get_shader_param(scr, sh, PIPE_SHADER_CAP_MAX_CONST_BUFFERS); + int maximg = scr->get_shader_param(scr, sh, + PIPE_SHADER_CAP_MAX_SHADER_IMAGES); assert(maxsam <= PIPE_MAX_SAMPLERS); assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS); assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS); assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS); + assert(maximg <= PIPE_MAX_SHADER_IMAGES); if (maxsam > 0) { ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros); } @@ -415,6 +418,9 @@ void cso_destroy_context( struct cso_context *ctx ) if (maxssbo > 0) { ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0); } + if (maximg > 0) { + ctx->pipe->set_shader_images(ctx->pipe, sh, 0, maximg, NULL); + } for (int i = 0; i < maxcb; i++) { ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL); } diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 8157e921850..971fc80b5ac 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2109,8 +2109,8 @@ iris_get_scratch_space(struct iris_context *ice, * in the base configuration. */ unsigned subslice_total = screen->subslice_total; - if (devinfo->gen >= 12) - subslice_total = devinfo->num_subslices[0]; + if (devinfo->gen == 12) + subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); else if (devinfo->gen == 11) subslice_total = 8; else if (devinfo->gen < 11) diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index 276ad62b1dd..045f43ed8c0 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -793,7 +793,9 @@ iris_resource_set_aux_state(struct iris_context *ice, if (res->aux.state[level][start_layer + a] != aux_state) { res->aux.state[level][start_layer + a] = aux_state; /* XXX: Need to track which bindings to make dirty */ - ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; + ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER | + IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES | + IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES; ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } } diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index 8747ef4aa8a..3b34e32cd21 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -1125,6 +1125,20 @@ iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource) 0, INTEL_REMAINING_LAYERS, mod ? mod->aux_usage : ISL_AUX_USAGE_NONE, mod ? mod->supports_clear_color : false); + + if (!res->mod_info && res->aux.usage != ISL_AUX_USAGE_NONE) { + /* flush_resource may be used to prepare an image for sharing external + * to the driver (e.g. via eglCreateImage). To account for this, make + * sure to get rid of any compression that a consumer wouldn't know how + * to handle. + */ + for (int i = 0; i < IRIS_BATCH_COUNT; i++) { + if (iris_batch_references(&ice->batches[i], res->bo)) + iris_batch_flush(&ice->batches[i]); + } + + iris_resource_disable_aux(res); + } } static void diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 59a63f7bbab..b9ddb863a16 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1666,6 +1666,8 @@ struct iris_rasterizer_state { bool multisample; bool force_persample_interp; bool conservative_rasterization; + bool fill_mode_point; + bool fill_mode_line; bool fill_mode_point_or_line; enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ uint16_t sprite_coord_enable; @@ -1729,11 +1731,15 @@ iris_create_rasterizer_state(struct pipe_context *ctx, cso->conservative_rasterization = state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP; - cso->fill_mode_point_or_line = - state->fill_front == PIPE_POLYGON_MODE_LINE || + cso->fill_mode_point = state->fill_front == PIPE_POLYGON_MODE_POINT || - state->fill_back == PIPE_POLYGON_MODE_LINE || state->fill_back == PIPE_POLYGON_MODE_POINT; + cso->fill_mode_line = + state->fill_front == PIPE_POLYGON_MODE_LINE || + state->fill_back == PIPE_POLYGON_MODE_LINE; + cso->fill_mode_point_or_line = + cso->fill_mode_point || + cso->fill_mode_line; if (state->clip_plane_enable != 0) cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1; @@ -4059,6 +4065,28 @@ iris_emit_sbe_swiz(struct iris_batch *batch, } } +static bool +iris_is_drawing_points(const struct iris_context *ice) +{ + const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + + if (cso_rast->fill_mode_point) { + return true; + } + + if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) { + const struct brw_gs_prog_data *gs_prog_data = + (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; + return gs_prog_data->output_topology == _3DPRIM_POINTLIST; + } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) { + const struct brw_tes_prog_data *tes_data = + (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data; + return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; + } else { + return ice->state.prim_mode == PIPE_PRIM_POINTS; + } +} + static unsigned iris_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data, const struct iris_rasterizer_state *cso) @@ -4093,7 +4121,8 @@ iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) &urb_read_offset, &urb_read_length); unsigned sprite_coord_overrides = - iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast); + iris_is_drawing_points(ice) ? + iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast) : 0; iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { sbe.AttributeSwizzleEnable = true; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8f688fa3650..ef35f86b05f 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1482,11 +1482,12 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) /* Reset descriptors of buffer resources after \p buf has been invalidated. * If buf == NULL, reset all descriptors. */ -static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, +static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, uint64_t slot_mask, struct pipe_resource *buf, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; + bool noop = true; uint64_t mask = buffers->enabled_mask & slot_mask; while (mask) { @@ -1501,8 +1502,10 @@ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_ sctx, si_resource(buffer), buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); + noop = false; } } + return !noop; } /* Update all buffer bindings where the buffer is bound, including @@ -1577,11 +1580,15 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) } if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { - for (shader = 0; shader < SI_NUM_SHADERS; shader++) - si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], - si_const_and_shader_buffer_descriptors_idx(shader), - u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, - sctx->const_and_shader_buffers[shader].priority); + for (shader = 0; shader < SI_NUM_SHADERS; shader++) { + if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], + si_const_and_shader_buffer_descriptors_idx(shader), + u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, + sctx->const_and_shader_buffers[shader].priority) && + shader == PIPE_SHADER_COMPUTE) { + sctx->compute_shaderbuf_sgprs_dirty = true; + } + } } if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { @@ -1633,6 +1640,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); + + if (shader == PIPE_SHADER_COMPUTE) + sctx->compute_image_sgprs_dirty = true; } } } diff --git a/src/gallium/frontends/dri/dri_helpers.c b/src/gallium/frontends/dri/dri_helpers.c index 01a1fb3d96c..5e87df35a55 100644 --- a/src/gallium/frontends/dri/dri_helpers.c +++ b/src/gallium/frontends/dri/dri_helpers.c @@ -258,7 +258,9 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context, int renderbuffer, void *loaderPrivate, unsigned *error) { - struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; + struct st_context *st_ctx = (struct st_context *)dri_context(context)->st; + struct gl_context *ctx = st_ctx->ctx; + struct pipe_context *p_ctx = st_ctx->pipe; struct gl_renderbuffer *rb; struct pipe_resource *tex; __DRIimage *img; @@ -299,6 +301,13 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context, pipe_resource_reference(&img->texture, tex); + /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that + * it's in a shareable state. Do this now while we still have the access to + * the context. + */ + if (dri2_get_mapping_by_format(img->dri_format)) + p_ctx->flush_resource(p_ctx, tex); + *error = __DRI_IMAGE_ERROR_SUCCESS; return img; } @@ -326,7 +335,9 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, void *loaderPrivate) { __DRIimage *img; - struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; + struct st_context *st_ctx = (struct st_context *)dri_context(context)->st; + struct gl_context *ctx = st_ctx->ctx; + struct pipe_context *p_ctx = st_ctx->pipe; struct gl_texture_object *obj; struct pipe_resource *tex; GLuint face = 0; @@ -376,6 +387,13 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, pipe_resource_reference(&img->texture, tex); + /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that + * it's in a shareable state. Do this now while we still have the access to + * the context. + */ + if (dri2_get_mapping_by_format(img->dri_format)) + p_ctx->flush_resource(p_ctx, tex); + *error = __DRI_IMAGE_ERROR_SUCCESS; return img; } @@ -547,6 +565,9 @@ dri2_get_mapping_by_fourcc(int fourcc) const struct dri2_format_mapping * dri2_get_mapping_by_format(int format) { + if (format == __DRI_IMAGE_FORMAT_NONE) + return NULL; + for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) { if (dri2_format_table[i].dri_format == format) return &dri2_format_table[i]; diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 45734f95880..187aecde1f8 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -52,8 +52,6 @@ lvp_physical_device_init(struct lvp_physical_device *device, if (!device->pscreen) return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n"); - device->max_images = device->pscreen->get_shader_param(device->pscreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_SHADER_IMAGES); lvp_physical_device_get_supported_extensions(device, &device->supported_extensions); result = lvp_init_wsi(device); @@ -575,6 +573,19 @@ void lvp_GetPhysicalDeviceProperties2( } } +static void lvp_get_physical_device_queue_family_properties( + VkQueueFamilyProperties* pQueueFamilyProperties) +{ + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + }; +} + void lvp_GetPhysicalDeviceQueueFamilyProperties( VkPhysicalDevice physicalDevice, uint32_t* pCount, @@ -586,15 +597,21 @@ void lvp_GetPhysicalDeviceQueueFamilyProperties( } assert(*pCount >= 1); + lvp_get_physical_device_queue_family_properties(pQueueFamilyProperties); +} - *pQueueFamilyProperties = (VkQueueFamilyProperties) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 64, - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, - }; +void lvp_GetPhysicalDeviceQueueFamilyProperties2( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + return; + } + + assert(*pCount >= 1); + lvp_get_physical_device_queue_family_properties(&pQueueFamilyProperties->queueFamilyProperties); } void lvp_GetPhysicalDeviceMemoryProperties( @@ -617,6 +634,14 @@ void lvp_GetPhysicalDeviceMemoryProperties( }; } +void lvp_GetPhysicalDeviceMemoryProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + lvp_GetPhysicalDeviceMemoryProperties(physicalDevice, + &pMemoryProperties->memoryProperties); +} + PFN_vkVoidFunction lvp_GetInstanceProcAddr( VkInstance _instance, const char* pName) @@ -822,6 +847,8 @@ VkResult lvp_CreateDevice( const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { + fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n"); + LVP_FROM_HANDLE(lvp_physical_device, physical_device, physicalDevice); struct lvp_device *device; diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c index 0f02ed2d321..e0ea27c0b18 100644 --- a/src/glx/g_glxglvnddispatchfuncs.c +++ b/src/glx/g_glxglvnddispatchfuncs.c @@ -87,6 +87,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = { __ATTRIB(SelectEventSGIX), // glXSwapBuffers implemented by libglvnd __ATTRIB(SwapBuffersMscOML), + __ATTRIB(SwapIntervalEXT), __ATTRIB(SwapIntervalMESA), __ATTRIB(SwapIntervalSGI), // glXUseXFont implemented by libglvnd @@ -893,6 +894,24 @@ static int dispatch_SwapIntervalMESA(unsigned int interval) +static void dispatch_SwapIntervalEXT(Display *dpy, GLXDrawable drawable, int interval) +{ + PFNGLXSWAPINTERVALEXTPROC pSwapIntervalEXT; + __GLXvendorInfo *dd; + + dd = GetDispatchFromDrawable(dpy, drawable); + if (dd == NULL) + return; + + __FETCH_FUNCTION_PTR(SwapIntervalEXT); + if (pSwapIntervalEXT == NULL) + return; + + pSwapIntervalEXT(dpy, drawable, interval); +} + + + static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t *ust, @@ -974,6 +993,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = { __ATTRIB(ReleaseTexImageEXT), __ATTRIB(SelectEventSGIX), __ATTRIB(SwapBuffersMscOML), + __ATTRIB(SwapIntervalEXT), __ATTRIB(SwapIntervalMESA), __ATTRIB(SwapIntervalSGI), __ATTRIB(WaitForMscOML), diff --git a/src/glx/g_glxglvnddispatchindices.h b/src/glx/g_glxglvnddispatchindices.h index 3ba50a74abb..b65d078098f 100644 --- a/src/glx/g_glxglvnddispatchindices.h +++ b/src/glx/g_glxglvnddispatchindices.h @@ -79,6 +79,7 @@ typedef enum __GLXdispatchIndex { DI_SelectEventSGIX, // SwapBuffers implemented by libglvnd DI_SwapBuffersMscOML, + DI_SwapIntervalEXT, DI_SwapIntervalMESA, DI_SwapIntervalSGI, // UseXFont implemented by libglvnd diff --git a/src/intel/common/gen_mi_builder.h b/src/intel/common/gen_mi_builder.h index ddd8459ef07..47fb98e99f7 100644 --- a/src/intel/common/gen_mi_builder.h +++ b/src/intel/common/gen_mi_builder.h @@ -932,6 +932,13 @@ gen_mi_store_address(struct gen_mi_builder *b, static inline void gen_mi_self_mod_barrier(struct gen_mi_builder *b) { + /* First make sure all the memory writes from previous modifying commands + * have landed. We want to do this before going through the CS cache, + * otherwise we could be fetching memory that hasn't been written to yet. + */ + gen_mi_builder_emit(b, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + } /* Documentation says Gen11+ should be able to invalidate the command cache * but experiment show it doesn't work properly, so for now just get over * the CS prefetch. diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 917c3abfe9e..6896987055f 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -437,6 +437,7 @@ instruction_requires_packed_data(fs_inst *inst) case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDY_FINE: case FS_OPCODE_DDY_COARSE: + case SHADER_OPCODE_QUAD_SWIZZLE: return true; default: return false; diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 6ba3a6ca97e..3a4acc1834a 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -451,13 +451,15 @@ regs_written(const fs_inst *inst) * Return the number of dataflow registers read by the instruction (either * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / * register_size)'. The somewhat arbitrary register size unit is 4B for the - * UNIFORM and IMM files and 32B for all other files. + * UNIFORM files and 32B for all other files. */ inline unsigned regs_read(const fs_inst *inst, unsigned i) { - const unsigned reg_size = - inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE; + if (inst->src[i].file == IMM) + return 1; + + const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE; return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i) - MIN2(inst->size_read(i), reg_padding(inst->src[i])), diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 9007cd00e85..48811912e95 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1447,8 +1447,8 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, * For, Gen11+, scratch space allocation is based on the number of threads * in the base configuration. */ - if (devinfo->gen >= 12) - subslices = devinfo->num_subslices[0]; + if (devinfo->gen == 12) + subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); else if (devinfo->gen == 11) subslices = 8; else if (devinfo->gen >= 9) diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 0290431f145..80307cd612f 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -684,6 +684,25 @@ choose_drm_format_mod(const struct anv_physical_device *device, return NULL; } +static VkImageUsageFlags +anv_image_create_usage(const VkImageCreateInfo *pCreateInfo, + VkImageUsageFlags usage) +{ + /* Add TRANSFER_SRC usage for multisample attachment images. This is + * because we might internally use the TRANSFER_SRC layout on them for + * blorp operations associated with resolving those into other attachments + * at the end of a subpass. + * + * Without this additional usage, we compute an incorrect AUX state in + * anv_layout_to_aux_state(). + */ + if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT && + (usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) + usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + return usage; +} + VkResult anv_image_create(VkDevice _device, const struct anv_image_create_info *create_info, @@ -732,7 +751,7 @@ anv_image_create(VkDevice _device, image->levels = pCreateInfo->mipLevels; image->array_size = pCreateInfo->arrayLayers; image->samples = pCreateInfo->samples; - image->usage = pCreateInfo->usage; + image->usage = anv_image_create_usage(pCreateInfo, pCreateInfo->usage); image->create_flags = pCreateInfo->flags; image->tiling = pCreateInfo->tiling; image->disjoint = pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT; @@ -745,8 +764,11 @@ anv_image_create(VkDevice _device, const VkImageStencilUsageCreateInfoEXT *stencil_usage_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_STENCIL_USAGE_CREATE_INFO_EXT); - if (stencil_usage_info) - image->stencil_usage = stencil_usage_info->stencilUsage; + if (stencil_usage_info) { + image->stencil_usage = + anv_image_create_usage(pCreateInfo, + stencil_usage_info->stencilUsage); + } } /* In case of external format, We don't know format yet, diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index af23b87969d..1818f6c587b 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -23,6 +23,7 @@ #include "anv_private.h" +#include "vk_format_info.h" #include "vk_util.h" static void @@ -406,6 +407,70 @@ num_subpass_attachments2(const VkSubpassDescription2KHR *desc) (ds_resolve && ds_resolve->pDepthStencilResolveAttachment); } +static bool +vk_image_layout_depth_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + return true; + + default: + return false; + } +} + +/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2: + * + * "If layout only specifies the layout of the depth aspect of the + * attachment, the layout of the stencil aspect is specified by the + * stencilLayout member of a VkAttachmentReferenceStencilLayout structure + * included in the pNext chain. Otherwise, layout describes the layout for + * all relevant image aspects." + */ +static VkImageLayout +stencil_ref_layout(const VkAttachmentReference2KHR *att_ref) +{ + if (!vk_image_layout_depth_only(att_ref->layout)) + return att_ref->layout; + + const VkAttachmentReferenceStencilLayoutKHR *stencil_ref = + vk_find_struct_const(att_ref->pNext, + ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); + if (!stencil_ref) + return VK_IMAGE_LAYOUT_UNDEFINED; + return stencil_ref->stencilLayout; +} + +/* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2: + * + * "If format is a depth/stencil format, and initialLayout only specifies + * the initial layout of the depth aspect of the attachment, the initial + * layout of the stencil aspect is specified by the stencilInitialLayout + * member of a VkAttachmentDescriptionStencilLayout structure included in + * the pNext chain. Otherwise, initialLayout describes the initial layout + * for all relevant image aspects." + */ +static VkImageLayout +stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final) +{ + if (!vk_format_has_stencil(att_desc->format)) + return VK_IMAGE_LAYOUT_UNDEFINED; + + const VkImageLayout main_layout = + final ? att_desc->finalLayout : att_desc->initialLayout; + if (!vk_image_layout_depth_only(main_layout)) + return main_layout; + + const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc = + vk_find_struct_const(att_desc->pNext, + ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR); + assert(stencil_desc); + return final ? + stencil_desc->stencilFinalLayout : + stencil_desc->stencilInitialLayout; +} + VkResult anv_CreateRenderPass2( VkDevice _device, const VkRenderPassCreateInfo2KHR* pCreateInfo, @@ -450,10 +515,6 @@ VkResult anv_CreateRenderPass2( pass->subpass_flushes = subpass_flushes; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - const VkAttachmentDescriptionStencilLayoutKHR *stencil_layout = - vk_find_struct_const(pCreateInfo->pAttachments[i].pNext, - ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR); - pass->attachments[i] = (struct anv_render_pass_attachment) { .format = pCreateInfo->pAttachments[i].format, .samples = pCreateInfo->pAttachments[i].samples, @@ -463,12 +524,10 @@ VkResult anv_CreateRenderPass2( .initial_layout = pCreateInfo->pAttachments[i].initialLayout, .final_layout = pCreateInfo->pAttachments[i].finalLayout, - .stencil_initial_layout = (stencil_layout ? - stencil_layout->stencilInitialLayout : - pCreateInfo->pAttachments[i].initialLayout), - .stencil_final_layout = (stencil_layout ? - stencil_layout->stencilFinalLayout : - pCreateInfo->pAttachments[i].finalLayout), + .stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], + false), + .stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], + true), }; } @@ -487,17 +546,11 @@ VkResult anv_CreateRenderPass2( subpass_attachments += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - const VkAttachmentReferenceStencilLayoutKHR *stencil_layout = - vk_find_struct_const(desc->pInputAttachments[j].pNext, - ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); - subpass->input_attachments[j] = (struct anv_subpass_attachment) { .usage = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, .attachment = desc->pInputAttachments[j].attachment, .layout = desc->pInputAttachments[j].layout, - .stencil_layout = (stencil_layout ? - stencil_layout->stencilLayout : - desc->pInputAttachments[j].layout), + .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]), }; } } @@ -531,17 +584,11 @@ VkResult anv_CreateRenderPass2( if (desc->pDepthStencilAttachment) { subpass->depth_stencil_attachment = subpass_attachments++; - const VkAttachmentReferenceStencilLayoutKHR *stencil_attachment = - vk_find_struct_const(desc->pDepthStencilAttachment->pNext, - ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); - *subpass->depth_stencil_attachment = (struct anv_subpass_attachment) { .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, .attachment = desc->pDepthStencilAttachment->attachment, .layout = desc->pDepthStencilAttachment->layout, - .stencil_layout = stencil_attachment ? - stencil_attachment->stencilLayout : - desc->pDepthStencilAttachment->layout, + .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment), }; } @@ -552,17 +599,11 @@ VkResult anv_CreateRenderPass2( if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) { subpass->ds_resolve_attachment = subpass_attachments++; - const VkAttachmentReferenceStencilLayoutKHR *stencil_resolve_attachment = - vk_find_struct_const(ds_resolve->pDepthStencilResolveAttachment->pNext, - ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); - *subpass->ds_resolve_attachment = (struct anv_subpass_attachment) { .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment, .layout = ds_resolve->pDepthStencilResolveAttachment->layout, - .stencil_layout = stencil_resolve_attachment ? - stencil_resolve_attachment->stencilLayout : - ds_resolve->pDepthStencilResolveAttachment->layout, + .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment), }; subpass->depth_resolve_mode = ds_resolve->depthResolveMode; subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a9c49e0f592..e3eb376fa5a 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -462,8 +462,10 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer, { uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + const struct anv_surface *surface = &image->planes[plane].surface; uint64_t base_address = - anv_address_physical(image->planes[plane].address); + anv_address_physical(anv_address_add(image->planes[plane].address, + surface->offset)); const struct isl_surf *isl_surf = &image->planes[plane].surface.isl; uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf); @@ -1231,6 +1233,17 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, uint32_t level_layer_count = MIN2(layer_count, aux_layers - base_layer); + /* If will_full_fast_clear is set, the caller promises to + * fast-clear the largest portion of the specified range as it can. + * For color images, that means only the first LOD and array slice. + */ + if (level == 0 && base_layer == 0 && will_full_fast_clear) { + base_layer++; + level_layer_count--; + if (level_layer_count == 0) + continue; + } + anv_image_ccs_op(cmd_buffer, image, image->planes[plane].surface.isl.format, ISL_SWIZZLE_IDENTITY, @@ -1250,6 +1263,12 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, "define an MCS buffer."); } + /* If will_full_fast_clear is set, the caller promises to fast-clear + * the largest portion of the specified range as it can. + */ + if (will_full_fast_clear) + return; + assert(base_level == 0 && level_count == 1); anv_image_mcs_op(cmd_buffer, image, image->planes[plane].surface.isl.format, diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 205e8677f19..33f071019b7 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1180,7 +1180,22 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline, #endif .LogicOpEnable = info->logicOpEnable, .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, + /* Vulkan specification 1.2.168, VkLogicOp: + * + * "Logical operations are controlled by the logicOpEnable and + * logicOp members of VkPipelineColorBlendStateCreateInfo. If + * logicOpEnable is VK_TRUE, then a logical operation selected by + * logicOp is applied between each color attachment and the + * fragment’s corresponding output value, and blending of all + * attachments is treated as if it were disabled." + * + * From the Broadwell PRM Volume 2d: Command Reference: Structures: + * BLEND_STATE_ENTRY: + * + * "Enabling LogicOp and Color Buffer Blending at the same time is + * UNDEFINED" + */ + .ColorBufferBlendEnable = !info->logicOpEnable && a->blendEnable, .ColorClampRange = COLORCLAMP_RTFORMAT, .PreBlendColorClampEnable = true, .PostBlendColorClampEnable = true, diff --git a/src/intel/vulkan/vk_format_info.h b/src/intel/vulkan/vk_format_info.h index 006e1f4a6ad..4e72c244742 100644 --- a/src/intel/vulkan/vk_format_info.h +++ b/src/intel/vulkan/vk_format_info.h @@ -164,4 +164,11 @@ vk_format_has_depth(VkFormat format) return aspects & VK_IMAGE_ASPECT_DEPTH_BIT; } +static inline bool +vk_format_has_stencil(VkFormat format) +{ + const VkImageAspectFlags aspects = vk_format_aspects(format); + return aspects & VK_IMAGE_ASPECT_STENCIL_BIT; +} + #endif /* VK_FORMAT_INFO_H */ diff --git a/src/mesa/state_tracker/st_pbo.c b/src/mesa/state_tracker/st_pbo.c index 65a1ce8862a..b03921c1be6 100644 --- a/src/mesa/state_tracker/st_pbo.c +++ b/src/mesa/state_tracker/st_pbo.c @@ -431,16 +431,21 @@ create_fs(struct st_context *st, bool download, nir_ssa_def *coord = nir_load_var(&b, fragcoord); nir_ssa_def *layer = NULL; - if (st->pbo.layers && need_layer && (!download || target == PIPE_TEXTURE_1D_ARRAY || - target == PIPE_TEXTURE_2D_ARRAY || - target == PIPE_TEXTURE_3D || - target == PIPE_TEXTURE_CUBE || - target == PIPE_TEXTURE_CUBE_ARRAY)) { - nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, - glsl_int_type(), "gl_Layer"); - var->data.location = VARYING_SLOT_LAYER; - var->data.interpolation = INTERP_MODE_FLAT; - layer = nir_load_var(&b, var); + if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY)) { + if (need_layer) { + nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, + glsl_int_type(), "gl_Layer"); + var->data.location = VARYING_SLOT_LAYER; + var->data.interpolation = INTERP_MODE_FLAT; + layer = nir_load_var(&b, var); + } + else { + layer = zero; + } } /* offset_pos = param.xy + f2i(coord.xy) */ diff --git a/src/util/format/u_format.csv b/src/util/format/u_format.csv index 8acfb869bdb..237c4c95475 100644 --- a/src/util/format/u_format.csv +++ b/src/util/format/u_format.csv @@ -500,7 +500,7 @@ PIPE_FORMAT_R4G4B4A4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , xy PIPE_FORMAT_B4G4R4A4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , zyxw, rgb, up4 , up4 , up4 , up4 , yzwx PIPE_FORMAT_A4R4G4B4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , yzwx, rgb, up4 , up4 , up4 , up4 , zyxw PIPE_FORMAT_A4B4G4R4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , wzyx, rgb, up4 , up4 , up4 , up4 , xyzw -PIPE_FORMAT_A1R5G5B5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , zyxw +PIPE_FORMAT_A1R5G5B5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , yzwx, rgb, up5 , up5 , up5 , up1 , zyxw PIPE_FORMAT_A1B5G5R5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , xyzw PIPE_FORMAT_R5G5B5A1_UINT , plain, 1, 1, 1, up5 , up5 , up5 , up1 , xyzw, rgb, up5 , up5 , up5 , up1 , wzyx PIPE_FORMAT_B5G5R5A1_UINT , plain, 1, 1, 1, up5 , up5 , up5 , up1 , zyxw, rgb, up1 , up5 , up5 , up5 , yzwx diff --git a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json index 1d5fffd0135..361ae9fe74e 100644 --- a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json +++ b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json @@ -4,7 +4,7 @@ "name": "VK_LAYER_MESA_device_select", "type": "GLOBAL", "library_path": "libVkLayer_MESA_device_select.so", - "api_version": "1.1.73", + "api_version": "1.2.73", "implementation_version": "1", "description": "Linux device selection layer", "functions": {