From be59b88b263e7587158d27aca628c916837946ba Mon Sep 17 00:00:00 2001 From: Chris Sarbora Date: Sat, 24 Aug 2024 00:33:41 -0500 Subject: [PATCH] Improve Renderer Performance Use orphaned "buffer update streaming" to eliminate synchronization delays, due to CPU->GPU latency, causing framerate slowdowns. This sends vertex data to the GPU via partial updates to a buffer and reallocates the buffer once it fills up, ensuring that no synchronization is ever needed. The buffer is sized to balance memory usage vs allocation rate, and the GL driver ensures that "orphaned" buffers are only destroyed when all GL commands using them are retired. --- renderer/HardwareOpenGL.cpp | 31 +++++++++--------- renderer/ShaderProgram.h | 63 +++++++++++++++++++++++++++++++------ renderer/dyna_gl.h | 2 ++ 3 files changed, 70 insertions(+), 26 deletions(-) diff --git a/renderer/HardwareOpenGL.cpp b/renderer/HardwareOpenGL.cpp index cfa6d3ff..37e13527 100644 --- a/renderer/HardwareOpenGL.cpp +++ b/renderer/HardwareOpenGL.cpp @@ -111,16 +111,21 @@ struct Renderer { shader_.setUniform1i("u_texture_enable", texture_enable_); } - void setVertexData(size_t offset, size_t count, PosColorUV2Vertex const* vertices) { - shader_.setVertexData(offset, count, vertices); + template ::value_type, PosColorUV2Vertex>>> + size_t addVertexData(VertexIter begin, VertexIter end) { + return shader_.addVertexData(begin, end); } - void setVertexData(size_t offset, size_t count, PosColorUVVertex const* vertices) { + struct PosColorUVVertex_tag {}; + template ::value_type, PosColorUVVertex>>> + size_t addVertexData(VertexIter begin, VertexIter end, PosColorUVVertex_tag = {}) { std::array converted; - std::transform(vertices, vertices + count, converted.begin(), [](auto const& vtx) { + std::transform(begin, end, converted.begin(), [](auto const& vtx) { return PosColorUV2Vertex{vtx.pos, vtx.color, vtx.uv, {}}; }); - setVertexData(offset, count, converted.data()); + return shader_.addVertexData(converted.cbegin(), converted.cend()); } void setFogEnabled(bool enabled) { @@ -1332,8 +1337,6 @@ void gpu_BindTexture(int handle, int map_type, int slot) { } void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) { - gRenderer->setVertexData(0, nv, vData); - if (gpu_state.cur_texture_quality == 0) { // force disable textures gRenderer->setTextureEnabled(0, false); @@ -1342,7 +1345,7 @@ void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) { gRenderer->setTextureEnabled(1, false); // draw the data in the arrays - dglDrawArrays(GL_TRIANGLE_FAN, 0, nv); + dglDrawArrays(GL_TRIANGLE_FAN, gRenderer->addVertexData(vData, vData + nv), nv); if (gpu_state.cur_texture_quality == 0) { // re-enable textures @@ -1355,9 +1358,8 @@ void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) { void gpu_RenderPolygonUV2(PosColorUV2Vertex *vData, uint32_t nv) { gRenderer->setTextureEnabled(1, true); - gRenderer->setVertexData(0, nv, vData); - dglDrawArrays(GL_TRIANGLE_FAN, 0, nv); + dglDrawArrays(GL_TRIANGLE_FAN, gRenderer->addVertexData(vData, vData + nv), nv); OpenGL_polys_drawn++; OpenGL_verts_processed += nv; @@ -1598,8 +1600,7 @@ void rend_SetPixel(ddgr_color color, int x, int y) { {}, {} }; - gRenderer->setVertexData(0, 1, &vtx); - dglDrawArrays(GL_POINTS, 0, 1); + dglDrawArrays(GL_POINTS, gRenderer->addVertexData(&vtx, &vtx + 1), 1); } // Sets a pixel on the display @@ -1645,8 +1646,7 @@ void rend_DrawLine(int x1, int y1, int x2, int y2) { } }; - gRenderer->setVertexData(0, vertices.size(), vertices.data()); - dglDrawArrays(GL_LINES, 0, vertices.size()); + dglDrawArrays(GL_LINES, gRenderer->addVertexData(vertices.begin(), vertices.end()), vertices.size()); rend_SetAlphaType(atype); rend_SetLighting(ltype); @@ -1726,8 +1726,7 @@ void rend_DrawSpecialLine(g3Point *p0, g3Point *p1) { {}}; }); - gRenderer->setVertexData(0, vertices.size(), vertices.data()); - dglDrawArrays(GL_LINES, 0, vertices.size()); + dglDrawArrays(GL_LINES, gRenderer->addVertexData(vertices.begin(), vertices.end()), vertices.size()); } // Takes a screenshot of the current frame and puts it into the handle passed diff --git a/renderer/ShaderProgram.h b/renderer/ShaderProgram.h index 75ba79c1..90ce5acb 100644 --- a/renderer/ShaderProgram.h +++ b/renderer/ShaderProgram.h @@ -54,15 +54,16 @@ template struct VertexBuffer { VertexBuffer(GLuint program, std::vector> attribs, - size_t vertex_count, + size_t vertexCount, GLenum bufferType, V const* initialData = nullptr) : vao_{outval(dglGenVertexArrays)}, vbo_{outval(dglGenBuffers)} { dglBindVertexArray(vao_); - dglBindBuffer(GL_ARRAY_BUFFER, vbo_); + + bind(); dglBufferData(GL_ARRAY_BUFFER, - vertex_count * sizeof(PosColorUV2Vertex), + vertexCount * sizeof(V), initialData, bufferType); @@ -78,11 +79,16 @@ struct VertexBuffer { } } - void UpdateData(size_t vtx_offset, size_t vtx_count, V const* vertices) const { - dglBindBuffer(GL_ARRAY_BUFFER, vbo_); + void UpdateData(size_t vtx_offset, size_t vtx_count, V const* vertices) { + bind(); dglBufferSubData(GL_ARRAY_BUFFER, vtx_offset * sizeof(V), vtx_count * sizeof(V), vertices); } +protected: + void bind() { + dglBindBuffer(GL_ARRAY_BUFFER, vbo_); + } + private: static void DeleteBuffer(GLuint id) { dglDeleteBuffers(1, &id); @@ -101,6 +107,41 @@ private: MoveOnlyHolder vbo_; }; +// https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming#Buffer_update +template +struct OrphaningVertexBuffer : VertexBuffer { + OrphaningVertexBuffer(GLuint program, std::vector> attribs) + : VertexBuffer{program, std::move(attribs), kVertexCount, kBufferType} {} + + template ::value_type, V>>> + size_t AddVertexData(VertexIter begin, VertexIter end) { + this->bind(); + + auto dist = std::distance(begin, end); + if (nextVertex_ + dist >= kVertexCount) { + dglBufferData(GL_ARRAY_BUFFER, + kVertexCount * sizeof(V), + nullptr, + kBufferType); + nextVertex_ = 0; + } + + auto start = nextVertex_; + V* mapped = reinterpret_cast(dglMapBufferRange(GL_ARRAY_BUFFER, start * sizeof(V), dist * sizeof(V), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); + std::copy(begin, end, mapped); + dglUnmapBuffer(GL_ARRAY_BUFFER); + + nextVertex_ += dist; + return start; + } + +private: + static constexpr size_t kVertexCount{1 << 16}; + static constexpr GLenum kBufferType{GL_STREAM_DRAW}; + size_t nextVertex_{}; +}; + template struct Shader { static_assert(kType == GL_VERTEX_SHADER || kType == GL_FRAGMENT_SHADER); @@ -141,7 +182,7 @@ private: template struct ShaderProgram { explicit ShaderProgram(std::string_view vertexSrc, std::string_view fragmentSrc, std::vector> attribs) - : id_{dglCreateProgram()}, vertex_{vertexSrc}, fragment_{fragmentSrc}, vbo_{id_, std::move(attribs), MAX_POINTS_IN_POLY, GL_DYNAMIC_DRAW} { + : id_{dglCreateProgram()}, vertex_{vertexSrc}, fragment_{fragmentSrc}, vbo_{id_, std::move(attribs)} { if (id_ == 0) { throw std::runtime_error("error creating GL program"); } @@ -170,8 +211,10 @@ struct ShaderProgram { dglUseProgram(0); } - void setVertexData(size_t offset, size_t count, PosColorUV2Vertex const* vertices) { - vbo_.UpdateData(offset, count, vertices); + template ::value_type, V>>> + size_t addVertexData(VertexIter begin, VertexIter end) { + return vbo_.AddVertexData(begin, end); } void setUniformMat4f(std::string const& name, glm::mat4x4 const& matrix) { @@ -212,6 +255,6 @@ private: MoveOnlyHolder id_; Shader vertex_; Shader fragment_; - VertexBuffer vbo_; + OrphaningVertexBuffer vbo_; std::unordered_map uniform_cache_; -}; \ No newline at end of file +}; diff --git a/renderer/dyna_gl.h b/renderer/dyna_gl.h index 8d2dbed6..2f12c830 100644 --- a/renderer/dyna_gl.h +++ b/renderer/dyna_gl.h @@ -169,6 +169,7 @@ DYNAEXTERN(glGetString); DYNAEXTERN(glGetStringi); DYNAEXTERN(glGetUniformLocation); DYNAEXTERN(glLinkProgram); +DYNAEXTERN(glMapBufferRange); DYNAEXTERN(glPixelStorei); DYNAEXTERN(glPolygonOffset); DYNAEXTERN(glReadPixels); @@ -181,6 +182,7 @@ DYNAEXTERN(glUniform1f); DYNAEXTERN(glUniform1i); DYNAEXTERN(glUniform4f); DYNAEXTERN(glUniformMatrix4fv); +DYNAEXTERN(glUnmapBuffer); DYNAEXTERN(glUseProgram); DYNAEXTERN(glVertexAttribPointer); DYNAEXTERN(glViewport);