#define DX11_MAX_TEXTURE_COUNT 32 struct DX11Texture { ID3D11Texture2D* pointer; ID3D11ShaderResourceView* view; }; struct GL_Program { ID3D11VertexShader* vertex; ID3D11InputLayout* layout; ID3D11PixelShader* pixel; b8 valid; }; struct DX11 { b8 initialized; ID3D11Device1* device; ID3D11DeviceContext1* context; IDXGISwapChain1* swap_chain; ID3D11SamplerState* sampler; ID3D11RenderTargetView* render_target_view; GL_Program gpu_program; ID3D11Buffer* vertex_buffer; ID3D11Buffer* constants_buffer; // NOTE(simon, 28/02/24): To keep the API the same since the OpenGL texture handle are store in // other places than the graphics parts (e.g. in the font Face struct), we create an array of // textures, and use the indices as texture handles. DX11Texture textures[ DX11_MAX_TEXTURE_COUNT + 1 ]; // NOTE(simon, 28/02/24): The first slot in the array should not be used so we can consider an // index of 0 to be invalid. OpenGL should not return 0 for texture handle, so we sort of do // the same. u32 texture_count; }; global DX11 g_dx11 = { }; // NOTE(simon, 28/02/24): Passing 0 for texid use the reserved texture in the array, and passing a // resource view of zero unbinds the resource. internal void gl__bind_texture(Render_Target *t, i32 texid){ if (t->bound_texture != texid){ DX11Texture* texture = g_dx11.textures + texid; g_dx11.context->PSSetShaderResources( 0, 1, &texture->view ); t->bound_texture = texid; } } internal void gl__bind_any_texture(Render_Target *t){ if (t->bound_texture == 0){ Assert(t->fallback_texture_id != 0); DX11Texture* texture = g_dx11.textures + t->fallback_texture_id; g_dx11.context->PSSetShaderResources( 0, 1, &texture->view ); t->bound_texture = t->fallback_texture_id; } } internal u32 gl__get_texture(Vec3_i32 dim, Texture_Kind texture_kind){ u32 texid = 0; if ( g_dx11.texture_count < ArrayCount( g_dx11.textures ) ) { texid = g_dx11.texture_count; g_dx11.texture_count++; } else { for ( u32 i = 1; i < g_dx11.texture_count; i++ ) { DX11Texture* texture = g_dx11.textures + i; if ( !texture->pointer && !texture->view ) { texid = i; break; } } } if ( texid ) { DX11Texture* texture = g_dx11.textures + texid; Assert( texture->pointer == 0 ); Assert( texture->view == 0 ); D3D11_TEXTURE2D_DESC texture_desc = { 0 }; texture_desc.Width = dim.x; texture_desc.Height = dim.y; texture_desc.MipLevels = 1; texture_desc.ArraySize = dim.z; texture_desc.Format = DXGI_FORMAT_A8_UNORM; texture_desc.SampleDesc.Count = 1; texture_desc.Usage = D3D11_USAGE_DEFAULT; texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; texture_desc.CPUAccessFlags = 0; // D3D11_CPU_ACCESS_WRITE; // NOTE(simon, 28/02/24): I initialize the texture with zeros. In practice it doesn't seem // to matter, but since the shader use a bilinear filter, the unitialized data in the // texture could change the result of the filtering for texel at the edge of a character. // I did some tests with the rectangle packer to have a border around character but got the // exact same render, so It doesn't matter much. D3D11_SUBRESOURCE_DATA* texture_data = push_array_zero( &win32vars.frame_arena, D3D11_SUBRESOURCE_DATA, dim.z ); u8* initial_data = push_array_zero( &win32vars.frame_arena, u8, dim.x * dim.y ); for ( i32 i = 0; i < dim.z; i++ ) { texture_data[ i ].pSysMem = initial_data; texture_data[ i ].SysMemPitch = dim.x; } HRESULT hr = g_dx11.device->CreateTexture2D( &texture_desc, texture_data, &texture->pointer ); pop_array( &win32vars.frame_arena, u8, dim.x * dim.y ); pop_array( &win32vars.frame_arena, D3D11_SUBRESOURCE_DATA, dim.z ); if ( SUCCEEDED( hr ) ) { hr = g_dx11.device->CreateShaderResourceView( ( ID3D11Resource* ) texture->pointer, 0, &texture->view ); } if ( FAILED( hr ) ) { // NOTE(simon, 28/02/24): When we fail, we donc decrement the texture count, but the // loop at the beginning of the function will reuse texture when // texture_count == DX11_MAX_TEXTURE_COUNT. texid = 0; if ( texture->pointer ) { texture->pointer->Release( ); texture->pointer = 0; } if ( texture->view ) { texture->view->Release( ); texture->view = 0; } } } return(texid); } internal b32 gl__fill_texture(Texture_Kind texture_kind, u32 texid, Vec3_i32 p, Vec3_i32 dim, void *data){ // NOTE(simon, 28/02/24): The OpenGL version always returns false. b32 result = false; // NOTE(simon, 28/02/24): In the OpenGL version, if we pass zero as texture handle, the // function works on the currently bound texture. In directx we need to get the texture pointer. // We could retrieve that from Render_Target->bound_texture, but we don't have that as a // parameter to this function and don't want to change the signature since it's used by the // font rendering code and other platforms. Fortunately the only call that specified 0 for the // texture handle was for the creation of the fallback texture in gl_render, and we can modify // that call to pass the fallback texture handle. Assert( texid != 0 ); if (dim.x > 0 && dim.y > 0 && dim.z > 0){ DX11Texture* texture = g_dx11.textures + texid; D3D11_BOX box = { }; box.left = p.x; box.right = p.x + dim.x; box.top = p.y; box.bottom = p.y + dim.y; box.front = 0; box.back = 1; u32 sub_resource_index = D3D11CalcSubresource( 0 /* MipSlice */, p.z /* ArraySlice */, 1 /* MipLevels */ ); g_dx11.context->UpdateSubresource( texture->pointer, sub_resource_index, &box, data, dim.x, dim.x * dim.y ); } return(result); } internal void gl__free_texture( u32 texid ) { if ( texid ) { DX11Texture* texture = g_dx11.textures + texid; if ( texture->view ) { texture->view->Release( ); texture->view = 0; } if ( texture->pointer ) { texture->pointer->Release( ); texture->pointer = 0; } } } char *gl__vertex = R"foo( // NOTE(simon, 28/02/24): The layout of this is (constants are store in 16 bytes vectors (4 floats)) // vector1: view_m._11, view_m._12, 0, 0 // vector2: view_m._21, view_m._22, view_t.x, view_t.y cbuffer constants : register( b0 ) { row_major float2x2 view_m; float2 view_t; } struct input_t { float2 vertex_p : POSITION; float3 vertex_t : UVW; float4 vertex_c : COLOR; float vertex_ht : THICKNESS; }; struct output_t { float4 position : SV_POSITION; float4 color : COLOR; float3 uvw : UVW; float2 xy : XY; float2 adjusted_half_dim: HALF_DIM; float half_thickness : THICKNESS; }; output_t main(input_t input) { output_t output; output.position = float4( mul( view_m, ( input.vertex_p - view_t ) ), 0.0, 1.0 ); // NOTE(simon, 28/02/24): The input colors are BGRA, we need them as RGBA. output.color = input.vertex_c.zyxw; output.uvw = input.vertex_t; output.xy = input.vertex_p; output.half_thickness = input.vertex_ht; float2 center = input.vertex_t.xy; float2 half_dim = abs( input.vertex_p - center ); output.adjusted_half_dim = half_dim - input.vertex_t.zz + float2( 0.5, 0.5 ); return output; } )foo"; char *gl__fragment = R"foo( struct input_t { float4 position : SV_POSITION; float4 color : COLOR; float3 uvw : UVW; float2 xy : XY; float2 adjusted_half_dim: HALF_DIM; float half_thickness : THICKNESS; }; Texture2DArray alpha : register( t0 ); SamplerState alpha_sampler : register( s0 ); float rectangle_sd( float2 p, float2 b ) { float2 d = abs( p ) - b; return( length( max( d, float2( 0.0, 0.0 ) ) ) + min( max( d.x, d.y ), 0.0 ) ); } float4 main( input_t input ) : SV_TARGET { float has_thickness = step( 0.49, input.half_thickness ); float does_not_have_thickness = 1.0 - has_thickness; float sample_value = alpha.Sample( alpha_sampler, input.uvw ).a; sample_value *= does_not_have_thickness; float2 center = input.uvw.xy; float roundness = input.uvw.z; float sd = rectangle_sd( input.xy - center, input.adjusted_half_dim ); sd = sd - roundness; sd = abs( sd + input.half_thickness ) - input.half_thickness; float shape_value = 1.0 - smoothstep(-1.0, 0.0, sd); shape_value *= has_thickness; float4 result = float4( input.color.xyz, input.color.a * ( sample_value + shape_value ) ); return result; } )foo"; // NOTE(simon, 28/02/24): This function is not generic. It can compile any shader, but the vertex // input layout is fixed. 4coder only has one vertex format and shader, so we could remove this // function and move its content in the win32_gl_create_window. I removed the header parameter as // it's not useful in directx. internal GL_Program gl__make_program( char* vertex, char* pixel ) { GL_Program result = { }; u32 vertex_length = 0; while ( vertex && vertex[ vertex_length ] != 0 ) { vertex_length++; } u32 pixel_length = 0; while ( pixel && pixel[ pixel_length ] != 0 ) { pixel_length++; } ID3DBlob* vs_blob = 0; ID3DBlob* vs_error_blob = 0; ID3D11VertexShader* vertex_shader = 0; ID3D11InputLayout* input_layout = 0; ID3DBlob* ps_blob = 0; ID3DBlob* ps_error_blob = 0; ID3D11PixelShader* pixel_shader = 0; do { HRESULT hr = D3DCompile( vertex, vertex_length, 0, 0, 0, "main", "vs_5_0", 0, 0, &vs_blob, &vs_error_blob ); if ( FAILED( hr ) ) { log_os( "Failed to compile vertex shader.\n" ); if ( vs_error_blob ) { u8* error_message = ( u8* ) vs_error_blob->GetBufferPointer( ); u32 length = ( u32 ) vs_error_blob->GetBufferSize( ); log_os( "vertex shader error:\n%.*s\n", length, error_message ); } break; } hr = g_dx11.device->CreateVertexShader( vs_blob->GetBufferPointer( ), vs_blob->GetBufferSize( ), 0, &vertex_shader ); if ( FAILED( hr ) ) { log_os( "Failed to create a vertex shader.\n" ); break; } D3D11_INPUT_ELEMENT_DESC layout_desc[ 4 ] = { }; layout_desc[ 0 ].SemanticName = "POSITION"; layout_desc[ 0 ].Format = DXGI_FORMAT_R32G32_FLOAT; layout_desc[ 0 ].AlignedByteOffset = 0; layout_desc[ 0 ].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; layout_desc[ 1 ].SemanticName = "UVW"; layout_desc[ 1 ].Format = DXGI_FORMAT_R32G32B32_FLOAT; layout_desc[ 1 ].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT; layout_desc[ 1 ].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; layout_desc[ 2 ].SemanticName = "COLOR"; layout_desc[ 2 ].Format = DXGI_FORMAT_R8G8B8A8_UNORM; layout_desc[ 2 ].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT; layout_desc[ 2 ].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; layout_desc[ 3 ].SemanticName = "THICKNESS"; layout_desc[ 3 ].Format = DXGI_FORMAT_R32_FLOAT; layout_desc[ 3 ].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT; layout_desc[ 3 ].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; hr = g_dx11.device->CreateInputLayout( layout_desc, ArrayCount( layout_desc ), vs_blob->GetBufferPointer( ), vs_blob->GetBufferSize( ), &input_layout ); if ( FAILED( hr ) ) { log_os( "Failed to create input layout.\n" ); break; } hr = D3DCompile( pixel, pixel_length, 0, 0, 0, "main", "ps_5_0", 0, 0, &ps_blob, &ps_error_blob ); if ( FAILED( hr ) ) { log_os( "Failed to compile pixel shader.\n" ); if ( ps_error_blob ) { u8* error_message = ( u8* ) ps_error_blob->GetBufferPointer( ); u32 length = ( u32 ) ps_error_blob->GetBufferSize( ); log_os( "pixel shader error:\n%.*s\n", length, error_message ); } break; } hr = g_dx11.device->CreatePixelShader( ps_blob->GetBufferPointer( ), ps_blob->GetBufferSize( ), 0, &pixel_shader ); if ( FAILED( hr ) ) { log_os( "Failed to create a pixel shader.\n" ); break; } result.vertex = vertex_shader; result.layout = input_layout; result.pixel = pixel_shader; result.valid = true; } while ( 0 ); if ( vs_blob ) { vs_blob->Release( ); vs_blob = 0; } if ( vs_error_blob ) { vs_error_blob->Release( ); vs_error_blob = 0; } if ( ps_blob ) { ps_blob->Release( ); ps_blob = 0; } if ( ps_error_blob ) { ps_error_blob->Release( ); ps_error_blob = 0; } if ( !result.valid ) { if ( vertex_shader ) { vertex_shader->Release( ); vertex_shader = 0; } if ( input_layout ) { input_layout->Release( ); input_layout = 0; } if ( pixel_shader ) { pixel_shader->Release( ); pixel_shader = 0; } os_popup_error( "Error", "Shader compilation failed." ); } return result; } internal void gl_render(Render_Target *t){ Font_Set *font_set = (Font_Set*)t->font_set; local_persist b32 first_call = true; if (first_call){ // NOTE(simon, 28/02/24): Most of the code here has been moved in win32_gl_create_window // because if that code fails we should exit the application directly. first_call = false; u32 stride = sizeof( Render_Vertex ); u32 offset = 0; g_dx11.context->IASetVertexBuffers( 0, 1, &g_dx11.vertex_buffer, &stride, &offset ); g_dx11.context->IASetInputLayout( g_dx11.gpu_program.layout ); g_dx11.context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); g_dx11.context->VSSetShader( g_dx11.gpu_program.vertex, 0, 0 ); g_dx11.context->VSSetConstantBuffers( 0, 1, &g_dx11.constants_buffer ); g_dx11.context->PSSetShader( g_dx11.gpu_program.pixel, 0, 0 ); g_dx11.context->PSSetSamplers( 0, 1, &g_dx11.sampler ); { t->fallback_texture_id = gl__get_texture(V3i32(2, 2, 1), TextureKind_Mono); u8 white_block[] = { 0xFF, 0xFF, 0xFF, 0xFF, }; // NOTE(simon, 28/02/24): Passing the fallback texture, because we can't rely on the // fact that gl__get_texture has bound the fallback texture. gl__fill_texture(TextureKind_Mono, t->fallback_texture_id, V3i32(0, 0, 0), V3i32(2, 2, 1), white_block); } } // NOTE(simon, 28/02/24): OMSetRenderTargets needs to be set each frame when using a FLIP swap // chain. g_dx11.context->OMSetRenderTargets( 1, &g_dx11.render_target_view, 0 ); i32 width = t->width; i32 height = t->height; // NOTE(simon, 28/02/24): Viewport (0, 0) is top left in directx. Important for viewport and // scissor calls. D3D11_VIEWPORT viewport = { 0, // TopLeftX 0, // TopLeftY ( float ) width, // Width ( float ) height, // Height 0, // MinDepth 1// MaxDepth }; g_dx11.context->RSSetViewports( 1, &viewport ); D3D11_RECT scissor = { 0, // left 0, // top width, // right height // bottom }; g_dx11.context->RSSetScissorRects( 1, &scissor ); float magenta[ 4 ] = { 1.0f, 0.0f, 1.0f, 1.0f }; g_dx11.context->ClearRenderTargetView( g_dx11.render_target_view, magenta ); // NOTE(simon, 28/02/24): The constants (uniforms) were set in the render loop in the OpenGL // version. But since they don't vary between draw calls I moved the code before the render // loop. D3D11_MAPPED_SUBRESOURCE constants_map = { }; HRESULT hr = g_dx11.context->Map( ( ID3D11Resource* ) g_dx11.constants_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &constants_map ); // NOTE(simon, 28/02/24): The layout of the constants buffer was a bit confusing. This link // explains a little about how data is laid out: // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules // The article doesn't explain anything about matrices. What I found out while making this work // is that each row or column (depending on if we use column or row major matrices) of a matrix // needs to start on a new 16 bytes vector. For a 2 by 2 matrix, this means that there are two // register elements at the end of the first vector that aren't used. // Another thing is that the second vector only needs the first two elements for the matrix, // so the two elements we want to put next can be in the same vector. // NOTE(simon, 28/02/24): The code here could be shorter, but I prefer to make it clear what's // happening. f32 view_m[ 4 ] = { 2.0f / width, 0, 0, -2.0f / height }; f32 view_t[ 2 ] = { width / 2.0f, height / 2.0f }; f32* vector_1 = ( f32* ) constants_map.pData; f32* vector_2 = vector_1 + 4; vector_1[ 0 ] = view_m[ 0 ]; vector_1[ 1 ] = view_m[ 1 ]; vector_1[ 2 ] = 0; // Padding vector_1[ 3 ] = 0; // Padding vector_2[ 0 ] = view_m[ 2 ]; vector_2[ 1 ] = view_m[ 3 ]; vector_2[ 2 ] = view_t[ 0 ]; vector_2[ 3 ] = view_t[ 1 ]; g_dx11.context->Unmap( ( ID3D11Resource* ) g_dx11.constants_buffer, 0 ); gl__bind_texture( t, 0 ); for (Render_Free_Texture *free_texture = t->free_texture_first; free_texture != 0; free_texture = free_texture->next){ gl__free_texture( free_texture->tex_id ); } t->free_texture_first = 0; t->free_texture_last = 0; D3D11_BUFFER_DESC vertex_buffer_desc = { }; g_dx11.vertex_buffer->GetDesc( &vertex_buffer_desc ); for (Render_Group *group = t->group_first; group != 0; group = group->next){ Rect_i32 box = Ri32(group->clip_box); D3D11_RECT group_scissor = { }; group_scissor.left = box.x0; group_scissor.right = box.x1; group_scissor.top = box.y0; group_scissor.bottom = box.y1; g_dx11.context->RSSetScissorRects( 1, &group_scissor ); i32 vertex_count = group->vertex_list.vertex_count; if (vertex_count > 0){ Face *face = font_set_face_from_id(font_set, group->face_id); if (face != 0){ gl__bind_texture(t, face->texture); } else{ gl__bind_any_texture(t); } // NOTE(simon, 29/03/24): 4coder doesn't appear to clip character outside the screen // horizontally. Even with line wrapping enabled, you can have cases where the line // won't wrap, for example "{0,0,0,0,...}" with a lot of zero and no space will not // wrap. The consequence of that is that we might send a lot of vertex data that's // offscreen and the assumption about the vertex buffer size I made, can be wrong. // So in this loop we release the previous vertex and create a new one when necessary. u32 size_required = vertex_count * sizeof( Render_Vertex ); if ( size_required > vertex_buffer_desc.ByteWidth ) { u32 new_size = vertex_buffer_desc.ByteWidth * 2; while ( new_size < size_required ) { new_size *= 2; } // NOTE(simon, 29/03/24): Create a new buffer and only release the previous one if // the creation succeeded. If the creation fails, we skip this vertex group, which // means the user will see an empty panel, but at least we won't stop rendering. D3D11_BUFFER_DESC new_vertex_buffer_desc = vertex_buffer_desc; new_vertex_buffer_desc.ByteWidth = new_size; ID3D11Buffer* new_vertex_buffer = 0; hr = g_dx11.device->CreateBuffer( &new_vertex_buffer_desc, 0, &new_vertex_buffer ); if ( FAILED( hr ) ) { continue; } g_dx11.vertex_buffer->Release( ); g_dx11.vertex_buffer = new_vertex_buffer; vertex_buffer_desc.ByteWidth = new_size; u32 stride = sizeof( Render_Vertex ); u32 offset = 0; g_dx11.context->IASetVertexBuffers( 0, 1, &g_dx11.vertex_buffer, &stride, &offset ); } // NOTE(simon, 28/02/24): We fill the buffer, draw what we filled and then do the next // group, which allows to always start drawing from vertex 0. Alternatively we could // do a pass to fill the vertex buffer completly so we only map the vertex buffer once, // and then a second pass that just execute the draw calls. It doesn't seems necessary // since we have less than 10 draw call. D3D11_MAPPED_SUBRESOURCE vertex_map = { }; hr = g_dx11.context->Map( ( ID3D11Resource* ) g_dx11.vertex_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &vertex_map ); if ( FAILED( hr ) ) { // NOTE(simon, 28/02/24): It's improbable that Map will fail, but if it does we // just stop rendering, and we'll try on the next frame. We could just skip the // group and try with the next (using 'continue' instead of 'break'), but Map would // probably fail again. Waiting for the next frame "might" work. I don't really // know. We could also just exit the application assuming we won't be able to // render anything. break; } u8* bytes = ( u8* ) vertex_map.pData; for (Render_Vertex_Array_Node *node = group->vertex_list.first; node != 0; node = node->next){ i32 size = node->vertex_count*sizeof(*node->vertices); memcpy( bytes, node->vertices, size ); bytes += size; } g_dx11.context->Unmap( ( ID3D11Resource* ) g_dx11.vertex_buffer, 0 ); g_dx11.context->Draw( vertex_count, 0 ); } } }