Skip to content

Commit e858cb1

Browse files
null77Commit Bot
authored and
Commit Bot
committed
Split VAO dirty bits to speed iteration.
Using > 64 bits (we had over 90) would use a much slower dirty bit iteration. Speed this up by splitting the dirty bits into two levels. The first top level only has a single dirty bit per attrib, per binding, and one bit for the element array buffer. The next level has separate dirty bits for attribs and bindings. The D3D11 back-end doesn't actually care about individual dirty bits of attribs or bindings, since it resets entire attributes at a time, but the GL back-end only refreshes the necessary info. Improves the score of a simple state change microbenchmark by 15% on the D3D11 and GL back-ends with a no-op driver. Real-world impact will be smaller. Also includes a test suppression for an NVIDIA bug that surfaced when we changed the order of that GL commands were sent to the driver. BUG=angleproject:2389 Change-Id: If8d5e5eb0b27e2a77e20535e33626183d372d311 Reviewed-on: https://chromium-review.googlesource.com/556799 Reviewed-by: Geoff Lang <[email protected]> Reviewed-by: Yuly Novikov <[email protected]> Commit-Queue: Jamie Madill <[email protected]>
1 parent a0ccea1 commit e858cb1

12 files changed

+192
-102
lines changed

src/libANGLE/VertexArray.cpp

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,19 @@ size_t VertexArray::GetVertexIndexFromDirtyBit(size_t dirtyBit)
111111
static_assert(gl::MAX_VERTEX_ATTRIBS == gl::MAX_VERTEX_ATTRIB_BINDINGS,
112112
"The stride of vertex attributes should equal to that of vertex bindings.");
113113
ASSERT(dirtyBit > DIRTY_BIT_ELEMENT_ARRAY_BUFFER);
114-
return (dirtyBit - DIRTY_BIT_ATTRIB_0_ENABLED) % gl::MAX_VERTEX_ATTRIBS;
114+
return (dirtyBit - DIRTY_BIT_ATTRIB_0) % gl::MAX_VERTEX_ATTRIBS;
115+
}
116+
117+
void VertexArray::setDirtyAttribBit(size_t attribIndex, DirtyAttribBitType dirtyAttribBit)
118+
{
119+
mDirtyBits.set(DIRTY_BIT_ATTRIB_0 + attribIndex);
120+
mDirtyAttribBits[attribIndex].set(dirtyAttribBit);
121+
}
122+
123+
void VertexArray::setDirtyBindingBit(size_t bindingIndex, DirtyBindingBitType dirtyBindingBit)
124+
{
125+
mDirtyBits.set(DIRTY_BIT_BINDING_0 + bindingIndex);
126+
mDirtyBindingBits[bindingIndex].set(dirtyBindingBit);
115127
}
116128

117129
void VertexArray::bindVertexBufferImpl(const Context *context,
@@ -137,8 +149,7 @@ void VertexArray::bindVertexBuffer(const Context *context,
137149
GLsizei stride)
138150
{
139151
bindVertexBufferImpl(context, bindingIndex, boundBuffer, offset, stride);
140-
141-
mDirtyBits.set(DIRTY_BIT_BINDING_0_BUFFER + bindingIndex);
152+
setDirtyBindingBit(bindingIndex, DIRTY_BINDING_BUFFER);
142153
}
143154

144155
void VertexArray::setVertexAttribBinding(const Context *context,
@@ -153,17 +164,17 @@ void VertexArray::setVertexAttribBinding(const Context *context,
153164
ASSERT(context->getClientVersion() >= ES_3_1);
154165
mState.mVertexAttributes[attribIndex].bindingIndex = bindingIndex;
155166

156-
mDirtyBits.set(DIRTY_BIT_ATTRIB_0_BINDING + attribIndex);
167+
setDirtyAttribBit(attribIndex, DIRTY_ATTRIB_BINDING);
157168
}
169+
mState.mVertexAttributes[attribIndex].bindingIndex = static_cast<GLuint>(bindingIndex);
158170
}
159171

160172
void VertexArray::setVertexBindingDivisor(size_t bindingIndex, GLuint divisor)
161173
{
162174
ASSERT(bindingIndex < getMaxBindings());
163175

164176
mState.mVertexBindings[bindingIndex].setDivisor(divisor);
165-
166-
mDirtyBits.set(DIRTY_BIT_BINDING_0_DIVISOR + bindingIndex);
177+
setDirtyBindingBit(bindingIndex, DIRTY_BINDING_DIVISOR);
167178
}
168179

169180
void VertexArray::setVertexAttribFormatImpl(size_t attribIndex,
@@ -194,8 +205,7 @@ void VertexArray::setVertexAttribFormat(size_t attribIndex,
194205
GLuint relativeOffset)
195206
{
196207
setVertexAttribFormatImpl(attribIndex, size, type, normalized, pureInteger, relativeOffset);
197-
198-
mDirtyBits.set(DIRTY_BIT_ATTRIB_0_FORMAT + attribIndex);
208+
setDirtyAttribBit(attribIndex, DIRTY_ATTRIB_FORMAT);
199209
}
200210

201211
void VertexArray::setVertexAttribDivisor(const Context *context, size_t attribIndex, GLuint divisor)
@@ -214,7 +224,7 @@ void VertexArray::enableAttribute(size_t attribIndex, bool enabledState)
214224
mState.mVertexAttributesTypeMask.setIndex(
215225
GetVertexAttributeBaseType(mState.mVertexAttributes[attribIndex]), attribIndex);
216226

217-
mDirtyBits.set(DIRTY_BIT_ATTRIB_0_ENABLED + attribIndex);
227+
setDirtyAttribBit(attribIndex, DIRTY_ATTRIB_ENABLED);
218228

219229
// Update state cache
220230
mState.mEnabledAttributesMask.set(attribIndex, enabledState);
@@ -246,7 +256,7 @@ void VertexArray::setVertexAttribPointer(const Context *context,
246256

247257
bindVertexBufferImpl(context, attribIndex, boundBuffer, offset, effectiveStride);
248258

249-
mDirtyBits.set(DIRTY_BIT_ATTRIB_0_POINTER + attribIndex);
259+
setDirtyAttribBit(attribIndex, DIRTY_ATTRIB_POINTER);
250260
}
251261

252262
void VertexArray::setElementArrayBuffer(const Context *context, Buffer *buffer)
@@ -264,8 +274,15 @@ void VertexArray::syncState(const Context *context)
264274
{
265275
if (mDirtyBits.any())
266276
{
267-
mVertexArray->syncState(context, mDirtyBits);
277+
mVertexArray->syncState(context, mDirtyBits, mDirtyAttribBits, mDirtyBindingBits);
268278
mDirtyBits.reset();
279+
280+
// This is a bit of an implementation hack - but since we know the implementation
281+
// details of the dirty bit class it should always have the same effect as iterating
282+
// individual attribs. We could also look into schemes where iterating the dirty
283+
// bit set also resets it as you pass through it.
284+
memset(&mDirtyAttribBits, 0, sizeof(mDirtyAttribBits));
285+
memset(&mDirtyBindingBits, 0, sizeof(mDirtyBindingBits));
269286
}
270287
}
271288

src/libANGLE/VertexArray.h

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -153,40 +153,54 @@ class VertexArray final : public LabeledObject
153153
return mState.getEnabledAttributesMask();
154154
}
155155

156+
// Dirty bits for VertexArrays use a heirarchical design. At the top level, each attribute
157+
// has a single dirty bit. Then an array of MAX_ATTRIBS dirty bits each has a dirty bit for
158+
// enabled/pointer/format/binding. Bindings are handled similarly. Note that because the
159+
// total number of dirty bits is 33, it will not be as fast on a 32-bit machine, which
160+
// can't support the advanced 64-bit scanning intrinsics. We could consider packing the
161+
// binding and attribute bits together if this becomes a problem.
156162
enum DirtyBitType
157163
{
158164
DIRTY_BIT_ELEMENT_ARRAY_BUFFER,
159165

160-
// Reserve bits for enabled flags
161-
DIRTY_BIT_ATTRIB_0_ENABLED,
162-
DIRTY_BIT_ATTRIB_MAX_ENABLED = DIRTY_BIT_ATTRIB_0_ENABLED + gl::MAX_VERTEX_ATTRIBS,
166+
// Dirty bits for attributes.
167+
DIRTY_BIT_ATTRIB_0,
168+
DIRTY_BIT_ATTRIB_MAX = DIRTY_BIT_ATTRIB_0 + gl::MAX_VERTEX_ATTRIBS,
163169

164-
// Reserve bits for attrib pointers
165-
DIRTY_BIT_ATTRIB_0_POINTER = DIRTY_BIT_ATTRIB_MAX_ENABLED,
166-
DIRTY_BIT_ATTRIB_MAX_POINTER = DIRTY_BIT_ATTRIB_0_POINTER + gl::MAX_VERTEX_ATTRIBS,
170+
// Dirty bits for bindings.
171+
DIRTY_BIT_BINDING_0 = DIRTY_BIT_ATTRIB_MAX,
172+
DIRTY_BIT_BINDING_MAX = DIRTY_BIT_BINDING_0 + gl::MAX_VERTEX_ATTRIB_BINDINGS,
167173

168-
// Reserve bits for changes to VertexAttribFormat
169-
DIRTY_BIT_ATTRIB_0_FORMAT = DIRTY_BIT_ATTRIB_MAX_POINTER,
170-
DIRTY_BIT_ATTRIB_MAX_FORMAT = DIRTY_BIT_ATTRIB_0_FORMAT + gl::MAX_VERTEX_ATTRIBS,
171-
172-
// Reserve bits for changes to VertexAttribBinding
173-
DIRTY_BIT_ATTRIB_0_BINDING = DIRTY_BIT_ATTRIB_MAX_FORMAT,
174-
DIRTY_BIT_ATTRIB_MAX_BINDING = DIRTY_BIT_ATTRIB_0_BINDING + gl::MAX_VERTEX_ATTRIBS,
174+
DIRTY_BIT_UNKNOWN = DIRTY_BIT_BINDING_MAX,
175+
DIRTY_BIT_MAX = DIRTY_BIT_UNKNOWN,
176+
};
175177

176-
// Reserve bits for changes to BindVertexBuffer
177-
DIRTY_BIT_BINDING_0_BUFFER = DIRTY_BIT_ATTRIB_MAX_BINDING,
178-
DIRTY_BIT_BINDING_MAX_BUFFER = DIRTY_BIT_BINDING_0_BUFFER + gl::MAX_VERTEX_ATTRIB_BINDINGS,
178+
// We want to keep the number of dirty bits within 64 to keep iteration times fast.
179+
static_assert(DIRTY_BIT_MAX <= 64, "Too many vertex array dirty bits.");
179180

180-
// Reserve bits for binding divisors
181-
DIRTY_BIT_BINDING_0_DIVISOR = DIRTY_BIT_BINDING_MAX_BUFFER,
182-
DIRTY_BIT_BINDING_MAX_DIVISOR =
183-
DIRTY_BIT_BINDING_0_DIVISOR + gl::MAX_VERTEX_ATTRIB_BINDINGS,
181+
enum DirtyAttribBitType
182+
{
183+
DIRTY_ATTRIB_ENABLED,
184+
DIRTY_ATTRIB_POINTER,
185+
DIRTY_ATTRIB_FORMAT,
186+
DIRTY_ATTRIB_BINDING,
187+
DIRTY_ATTRIB_UNKNOWN,
188+
DIRTY_ATTRIB_MAX = DIRTY_ATTRIB_UNKNOWN,
189+
};
184190

185-
DIRTY_BIT_UNKNOWN = DIRTY_BIT_BINDING_MAX_DIVISOR,
186-
DIRTY_BIT_MAX = DIRTY_BIT_UNKNOWN,
191+
enum DirtyBindingBitType
192+
{
193+
DIRTY_BINDING_BUFFER,
194+
DIRTY_BINDING_DIVISOR,
195+
DIRTY_BINDING_UNKNOWN,
196+
DIRTY_BINDING_MAX = DIRTY_BINDING_UNKNOWN,
187197
};
188198

189199
using DirtyBits = angle::BitSet<DIRTY_BIT_MAX>;
200+
using DirtyAttribBits = angle::BitSet<DIRTY_ATTRIB_MAX>;
201+
using DirtyBindingBits = angle::BitSet<DIRTY_BINDING_MAX>;
202+
using DirtyAttribBitsArray = std::array<DirtyAttribBits, gl::MAX_VERTEX_ATTRIBS>;
203+
using DirtyBindingBitsArray = std::array<DirtyBindingBits, gl::MAX_VERTEX_ATTRIB_BINDINGS>;
190204

191205
static size_t GetVertexIndexFromDirtyBit(size_t dirtyBit);
192206

@@ -201,10 +215,15 @@ class VertexArray final : public LabeledObject
201215
private:
202216
~VertexArray() override;
203217

218+
void setDirtyAttribBit(size_t attribIndex, DirtyAttribBitType dirtyAttribBit);
219+
void setDirtyBindingBit(size_t bindingIndex, DirtyBindingBitType dirtyBindingBit);
220+
204221
GLuint mId;
205222

206223
VertexArrayState mState;
207224
DirtyBits mDirtyBits;
225+
DirtyAttribBitsArray mDirtyAttribBits;
226+
DirtyBindingBitsArray mDirtyBindingBits;
208227

209228
rx::VertexArrayImpl *mVertexArray;
210229
};

src/libANGLE/VertexArray_unittest.cpp

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using namespace gl;
1919
TEST(VertexArrayTest, VerifyGetIndexFromDirtyBit)
2020
{
2121
VertexArray::DirtyBits dirtyBits;
22-
constexpr size_t bits[] = {1, 4, 9, 16, 25, 36, 49, 64, 81, 92};
22+
constexpr size_t bits[] = {1, 4, 9, 16, 25};
2323
constexpr GLint count = sizeof(bits) / sizeof(size_t);
2424
for (GLint i = 0; i < count; i++)
2525
{
@@ -29,29 +29,13 @@ TEST(VertexArrayTest, VerifyGetIndexFromDirtyBit)
2929
for (size_t dirtyBit : dirtyBits)
3030
{
3131
const size_t index = VertexArray::GetVertexIndexFromDirtyBit(dirtyBit);
32-
if (dirtyBit < VertexArray::DIRTY_BIT_ATTRIB_MAX_ENABLED)
32+
if (dirtyBit < VertexArray::DIRTY_BIT_ATTRIB_MAX)
3333
{
34-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_ATTRIB_0_ENABLED, index);
34+
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_ATTRIB_0, index);
3535
}
36-
else if (dirtyBit < VertexArray::DIRTY_BIT_ATTRIB_MAX_POINTER)
36+
else if (dirtyBit < VertexArray::DIRTY_BIT_BINDING_MAX)
3737
{
38-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_ATTRIB_0_POINTER, index);
39-
}
40-
else if (dirtyBit < VertexArray::DIRTY_BIT_ATTRIB_MAX_FORMAT)
41-
{
42-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_ATTRIB_0_FORMAT, index);
43-
}
44-
else if (dirtyBit < VertexArray::DIRTY_BIT_ATTRIB_MAX_BINDING)
45-
{
46-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_ATTRIB_0_BINDING, index);
47-
}
48-
else if (dirtyBit < VertexArray::DIRTY_BIT_BINDING_MAX_BUFFER)
49-
{
50-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_BINDING_0_BUFFER, index);
51-
}
52-
else if (dirtyBit < VertexArray::DIRTY_BIT_BINDING_MAX_DIVISOR)
53-
{
54-
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_BINDING_0_DIVISOR, index);
38+
EXPECT_EQ(dirtyBit - VertexArray::DIRTY_BIT_BINDING_0, index);
5539
}
5640
else
5741
ASSERT_TRUE(false);

src/libANGLE/renderer/VertexArrayImpl.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ class VertexArrayImpl : angle::NonCopyable
2121
{
2222
public:
2323
VertexArrayImpl(const gl::VertexArrayState &state) : mState(state) {}
24-
virtual void syncState(const gl::Context *context, const gl::VertexArray::DirtyBits &dirtyBits)
24+
virtual void syncState(const gl::Context *context,
25+
const gl::VertexArray::DirtyBits &dirtyBits,
26+
const gl::VertexArray::DirtyAttribBitsArray &attribBits,
27+
const gl::VertexArray::DirtyBindingBitsArray &bindingBits)
2528
{
2629
}
2730

@@ -32,6 +35,6 @@ class VertexArrayImpl : angle::NonCopyable
3235
const gl::VertexArrayState &mState;
3336
};
3437

35-
}
38+
} // namespace rx
3639

3740
#endif // LIBANGLE_RENDERER_VERTEXARRAYIMPL_H_

src/libANGLE/renderer/d3d/d3d11/VertexArray11.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ void VertexArray11::destroy(const gl::Context *context)
5757
}
5858

5959
void VertexArray11::syncState(const gl::Context *context,
60-
const gl::VertexArray::DirtyBits &dirtyBits)
60+
const gl::VertexArray::DirtyBits &dirtyBits,
61+
const gl::VertexArray::DirtyAttribBitsArray &attribBits,
62+
const gl::VertexArray::DirtyBindingBitsArray &bindingBits)
6163
{
6264
ASSERT(dirtyBits.any());
6365

src/libANGLE/renderer/d3d/d3d11/VertexArray11.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ class VertexArray11 : public angle::ObserverInterface, public VertexArrayImpl
2727
void destroy(const gl::Context *context) override;
2828

2929
void syncState(const gl::Context *context,
30-
const gl::VertexArray::DirtyBits &dirtyBits) override;
30+
const gl::VertexArray::DirtyBits &dirtyBits,
31+
const gl::VertexArray::DirtyAttribBitsArray &attribBits,
32+
const gl::VertexArray::DirtyBindingBitsArray &bindingBits) override;
3133
// This will flush any pending attrib updates and then check the dynamic attribs mask.
3234
bool hasActiveDynamicAttrib(const gl::Context *context);
3335
gl::Error updateDirtyAndDynamicAttribs(const gl::Context *context,

src/libANGLE/renderer/d3d/d3d9/VertexArray9.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ class VertexArray9 : public VertexArrayImpl
2424
VertexArray9(const gl::VertexArrayState &data) : VertexArrayImpl(data) {}
2525

2626
void syncState(const gl::Context *context,
27-
const gl::VertexArray::DirtyBits &dirtyBits) override;
27+
const gl::VertexArray::DirtyBits &dirtyBits,
28+
const gl::VertexArray::DirtyAttribBitsArray &attribBits,
29+
const gl::VertexArray::DirtyBindingBitsArray &bindingBits) override;
2830

2931
~VertexArray9() override {}
3032

@@ -35,12 +37,14 @@ class VertexArray9 : public VertexArrayImpl
3537
};
3638

3739
inline void VertexArray9::syncState(const gl::Context *context,
38-
const gl::VertexArray::DirtyBits &dirtyBits)
40+
const gl::VertexArray::DirtyBits &dirtyBits,
41+
const gl::VertexArray::DirtyAttribBitsArray &attribBits,
42+
const gl::VertexArray::DirtyBindingBitsArray &bindingBits)
3943
{
4044
ASSERT(dirtyBits.any());
4145
Renderer9 *renderer = GetImplAs<Context9>(context)->getRenderer();
4246
mCurrentStateSerial = renderer->generateSerial();
4347
}
44-
}
48+
} // namespace rx
4549

4650
#endif // LIBANGLE_RENDERER_D3D_D3D9_VERTEXARRAY9_H_

0 commit comments

Comments
 (0)