Improve Renderer Performance

Use orphaned "buffer update streaming" to eliminate synchronization
delays, due to CPU->GPU latency, causing framerate slowdowns. This sends
vertex data to the GPU via partial updates to a buffer and reallocates
the buffer once it fills up, ensuring that no synchronization is ever
needed. The buffer is sized to balance memory usage vs allocation rate,
and the GL driver ensures that "orphaned" buffers are only destroyed
when all GL commands using them are retired.
This commit is contained in:
Chris Sarbora 2024-08-24 00:33:41 -05:00
parent d91f4357f6
commit be59b88b26
No known key found for this signature in database
3 changed files with 70 additions and 26 deletions

View File

@ -111,16 +111,21 @@ struct Renderer {
shader_.setUniform1i("u_texture_enable", texture_enable_);
}
void setVertexData(size_t offset, size_t count, PosColorUV2Vertex const* vertices) {
shader_.setVertexData(offset, count, vertices);
template <typename VertexIter,
typename = std::enable_if_t<std::is_same_v<typename std::iterator_traits<VertexIter>::value_type, PosColorUV2Vertex>>>
size_t addVertexData(VertexIter begin, VertexIter end) {
return shader_.addVertexData(begin, end);
}
void setVertexData(size_t offset, size_t count, PosColorUVVertex const* vertices) {
struct PosColorUVVertex_tag {};
template <typename VertexIter,
typename = std::enable_if_t<std::is_same_v<typename std::iterator_traits<VertexIter>::value_type, PosColorUVVertex>>>
size_t addVertexData(VertexIter begin, VertexIter end, PosColorUVVertex_tag = {}) {
std::array<PosColorUV2Vertex, MAX_POINTS_IN_POLY> converted;
std::transform(vertices, vertices + count, converted.begin(), [](auto const& vtx) {
std::transform(begin, end, converted.begin(), [](auto const& vtx) {
return PosColorUV2Vertex{vtx.pos, vtx.color, vtx.uv, {}};
});
setVertexData(offset, count, converted.data());
return shader_.addVertexData(converted.cbegin(), converted.cend());
}
void setFogEnabled(bool enabled) {
@ -1332,8 +1337,6 @@ void gpu_BindTexture(int handle, int map_type, int slot) {
}
void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) {
gRenderer->setVertexData(0, nv, vData);
if (gpu_state.cur_texture_quality == 0) {
// force disable textures
gRenderer->setTextureEnabled(0, false);
@ -1342,7 +1345,7 @@ void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) {
gRenderer->setTextureEnabled(1, false);
// draw the data in the arrays
dglDrawArrays(GL_TRIANGLE_FAN, 0, nv);
dglDrawArrays(GL_TRIANGLE_FAN, gRenderer->addVertexData(vData, vData + nv), nv);
if (gpu_state.cur_texture_quality == 0) {
// re-enable textures
@ -1355,9 +1358,8 @@ void gpu_RenderPolygon(PosColorUVVertex *vData, uint32_t nv) {
void gpu_RenderPolygonUV2(PosColorUV2Vertex *vData, uint32_t nv) {
gRenderer->setTextureEnabled(1, true);
gRenderer->setVertexData(0, nv, vData);
dglDrawArrays(GL_TRIANGLE_FAN, 0, nv);
dglDrawArrays(GL_TRIANGLE_FAN, gRenderer->addVertexData(vData, vData + nv), nv);
OpenGL_polys_drawn++;
OpenGL_verts_processed += nv;
@ -1598,8 +1600,7 @@ void rend_SetPixel(ddgr_color color, int x, int y) {
{},
{}
};
gRenderer->setVertexData(0, 1, &vtx);
dglDrawArrays(GL_POINTS, 0, 1);
dglDrawArrays(GL_POINTS, gRenderer->addVertexData(&vtx, &vtx + 1), 1);
}
// Sets a pixel on the display
@ -1645,8 +1646,7 @@ void rend_DrawLine(int x1, int y1, int x2, int y2) {
}
};
gRenderer->setVertexData(0, vertices.size(), vertices.data());
dglDrawArrays(GL_LINES, 0, vertices.size());
dglDrawArrays(GL_LINES, gRenderer->addVertexData(vertices.begin(), vertices.end()), vertices.size());
rend_SetAlphaType(atype);
rend_SetLighting(ltype);
@ -1726,8 +1726,7 @@ void rend_DrawSpecialLine(g3Point *p0, g3Point *p1) {
{}};
});
gRenderer->setVertexData(0, vertices.size(), vertices.data());
dglDrawArrays(GL_LINES, 0, vertices.size());
dglDrawArrays(GL_LINES, gRenderer->addVertexData(vertices.begin(), vertices.end()), vertices.size());
}
// Takes a screenshot of the current frame and puts it into the handle passed

View File

@ -54,15 +54,16 @@ template <typename V>
struct VertexBuffer {
VertexBuffer(GLuint program,
std::vector<VertexAttrib<V>> attribs,
size_t vertex_count,
size_t vertexCount,
GLenum bufferType,
V const* initialData = nullptr)
: vao_{outval(dglGenVertexArrays)},
vbo_{outval(dglGenBuffers)} {
dglBindVertexArray(vao_);
dglBindBuffer(GL_ARRAY_BUFFER, vbo_);
bind();
dglBufferData(GL_ARRAY_BUFFER,
vertex_count * sizeof(PosColorUV2Vertex),
vertexCount * sizeof(V),
initialData,
bufferType);
@ -78,11 +79,16 @@ struct VertexBuffer {
}
}
void UpdateData(size_t vtx_offset, size_t vtx_count, V const* vertices) const {
dglBindBuffer(GL_ARRAY_BUFFER, vbo_);
void UpdateData(size_t vtx_offset, size_t vtx_count, V const* vertices) {
bind();
dglBufferSubData(GL_ARRAY_BUFFER, vtx_offset * sizeof(V), vtx_count * sizeof(V), vertices);
}
protected:
void bind() {
dglBindBuffer(GL_ARRAY_BUFFER, vbo_);
}
private:
static void DeleteBuffer(GLuint id) {
dglDeleteBuffers(1, &id);
@ -101,6 +107,41 @@ private:
MoveOnlyHolder<GLuint, DeleteBuffer> vbo_;
};
// https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming#Buffer_update
template <typename V>
struct OrphaningVertexBuffer : VertexBuffer<V> {
OrphaningVertexBuffer(GLuint program, std::vector<VertexAttrib<V>> attribs)
: VertexBuffer<V>{program, std::move(attribs), kVertexCount, kBufferType} {}
template <typename VertexIter,
typename = std::enable_if<std::is_convertible_v<typename std::iterator_traits<VertexIter>::value_type, V>>>
size_t AddVertexData(VertexIter begin, VertexIter end) {
this->bind();
auto dist = std::distance(begin, end);
if (nextVertex_ + dist >= kVertexCount) {
dglBufferData(GL_ARRAY_BUFFER,
kVertexCount * sizeof(V),
nullptr,
kBufferType);
nextVertex_ = 0;
}
auto start = nextVertex_;
V* mapped = reinterpret_cast<V*>(dglMapBufferRange(GL_ARRAY_BUFFER, start * sizeof(V), dist * sizeof(V), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
std::copy(begin, end, mapped);
dglUnmapBuffer(GL_ARRAY_BUFFER);
nextVertex_ += dist;
return start;
}
private:
static constexpr size_t kVertexCount{1 << 16};
static constexpr GLenum kBufferType{GL_STREAM_DRAW};
size_t nextVertex_{};
};
template <GLenum kType>
struct Shader {
static_assert(kType == GL_VERTEX_SHADER || kType == GL_FRAGMENT_SHADER);
@ -141,7 +182,7 @@ private:
template <typename V>
struct ShaderProgram {
explicit ShaderProgram(std::string_view vertexSrc, std::string_view fragmentSrc, std::vector<VertexAttrib<V>> attribs)
: id_{dglCreateProgram()}, vertex_{vertexSrc}, fragment_{fragmentSrc}, vbo_{id_, std::move(attribs), MAX_POINTS_IN_POLY, GL_DYNAMIC_DRAW} {
: id_{dglCreateProgram()}, vertex_{vertexSrc}, fragment_{fragmentSrc}, vbo_{id_, std::move(attribs)} {
if (id_ == 0) {
throw std::runtime_error("error creating GL program");
}
@ -170,8 +211,10 @@ struct ShaderProgram {
dglUseProgram(0);
}
void setVertexData(size_t offset, size_t count, PosColorUV2Vertex const* vertices) {
vbo_.UpdateData(offset, count, vertices);
template <typename VertexIter,
typename = std::enable_if<std::is_convertible_v<typename std::iterator_traits<VertexIter>::value_type, V>>>
size_t addVertexData(VertexIter begin, VertexIter end) {
return vbo_.AddVertexData(begin, end);
}
void setUniformMat4f(std::string const& name, glm::mat4x4 const& matrix) {
@ -212,6 +255,6 @@ private:
MoveOnlyHolder<GLuint, DeleteProgram> id_;
Shader<GL_VERTEX_SHADER> vertex_;
Shader<GL_FRAGMENT_SHADER> fragment_;
VertexBuffer<V> vbo_;
OrphaningVertexBuffer<V> vbo_;
std::unordered_map<std::string, GLint> uniform_cache_;
};

View File

@ -169,6 +169,7 @@ DYNAEXTERN(glGetString);
DYNAEXTERN(glGetStringi);
DYNAEXTERN(glGetUniformLocation);
DYNAEXTERN(glLinkProgram);
DYNAEXTERN(glMapBufferRange);
DYNAEXTERN(glPixelStorei);
DYNAEXTERN(glPolygonOffset);
DYNAEXTERN(glReadPixels);
@ -181,6 +182,7 @@ DYNAEXTERN(glUniform1f);
DYNAEXTERN(glUniform1i);
DYNAEXTERN(glUniform4f);
DYNAEXTERN(glUniformMatrix4fv);
DYNAEXTERN(glUnmapBuffer);
DYNAEXTERN(glUseProgram);
DYNAEXTERN(glVertexAttribPointer);
DYNAEXTERN(glViewport);