Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 5 additions & 29 deletions indra/llimage/llimagej2c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,35 +268,11 @@ S32 LLImageJ2C::calcHeaderSizeJ2C()
//static
S32 LLImageJ2C::calcDataSizeJ2C(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate)
{
// Note: This provides an estimation for the first to last quality layer of a given discard level
// This is however an efficient approximation, as the true discard level boundary would be
// in general too big for fast fetching.
// For details about the equation used here, see https://wiki.lindenlab.com/wiki/THX1138_KDU_Improvements#Byte_Range_Study

// Estimate the number of layers. This is consistent with what's done for j2c encoding in LLImageJ2CKDU::encodeImpl().
constexpr S32 precision = 8; // assumed bitrate per component channel, might change in future for HDR support
constexpr S32 max_components = 4; // assumed the file has four components; three color and alpha
// Use MAX_IMAGE_SIZE_DEFAULT (currently 2048) if either dimension is unknown (zero)
S32 width = (w > 0) ? w : 2048;
S32 height = (h > 0) ? h : 2048;
S32 max_dimension = llmax(width, height); // Find largest dimension
S32 block_area = MAX_BLOCK_SIZE * MAX_BLOCK_SIZE; // Calculated initial block area from established max block size (currently 64)
S32 max_layers = (S32)llmax(llround(log2f((float)max_dimension) - log2f((float)MAX_BLOCK_SIZE)), 4); // Find number of powers of two between extents and block size to a minimum of 4
block_area *= llmax(max_layers, 1); // Adjust initial block area by max number of layers
S32 totalbytes = (S32) (MIN_LAYER_SIZE * max_components * precision); // Start estimation with a minimum reasonable size
S32 block_layers = 0;
while (block_layers <= max_layers) // Walk the layers
{
if (block_layers <= (5 - discard_level)) // Walk backwards from discard 5 to required discard layer.
totalbytes += (S32) (block_area * max_components * precision * rate); // Add each block layer reduced by assumed compression rate
block_layers++; // Move to next layer
block_area *= 4; // Increase block area by power of four
}

totalbytes /= 8; // to bytes
totalbytes += calcHeaderSizeJ2C(); // header

return totalbytes;
// Dispatch to the linked impl so OpenJPEG (block-aligned, needs
// over-allocation) and KDU (packet-aligned, lean) each return what
// their decoder actually needs.
static std::unique_ptr<LLImageJ2CImpl> s_estimator(fallbackCreateLLImageJ2CImpl());
return s_estimator->estimateDataSize(w, h, comp, discard_level, rate);
}

S32 LLImageJ2C::calcHeaderSize()
Expand Down
5 changes: 5 additions & 0 deletions indra/llimage/llimagej2c.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ class LLImageJ2CImpl
{
public:
virtual ~LLImageJ2CImpl();

// Estimate the byte size of a J2C codestream sufficient to decode the
// given discard level. KDU uses a packet-by-packet impl; OpenJPEG
// overrides with a more conservative block-aligned estimate.
virtual S32 estimateDataSize(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate) const = 0;
protected:
// Find out the image size and number of channels.
// Return value:
Expand Down
29 changes: 29 additions & 0 deletions indra/llimagej2coj/llimagej2coj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,3 +919,32 @@ bool LLImageJ2COJ::getMetadata(LLImageJ2C &base)
base.setSize(width, height, components);
return true;
}


// OpenJPEG-tuned byte estimator. Conservative pyramid walk that accounts for
// OJ's whole-code-block decode behavior (even with strict mode off). Larger
// images get a per-resolution multiplier so the byte range lands inside the
// last needed code-block boundary.
S32 LLImageJ2COJ::estimateDataSize(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate) const
{
constexpr S32 precision = 8;
constexpr S32 max_components = 4;
S32 width = (w > 0) ? w : 2048;
S32 height = (h > 0) ? h : 2048;
S32 max_dimension = llmax(width, height);
S32 block_area = MAX_BLOCK_SIZE * MAX_BLOCK_SIZE;
S32 max_layers = (S32)llmax(llround(log2f((float)max_dimension) - log2f((float)MAX_BLOCK_SIZE)), 4);
block_area *= llmax(max_layers, 1);
S32 totalbytes = (S32)(MIN_LAYER_SIZE * max_components * precision);
S32 block_layers = 0;
while (block_layers <= max_layers)
{
if (block_layers <= (5 - discard_level))
totalbytes += (S32)(block_area * max_components * precision * rate);
block_layers++;
block_area *= 4;
}
totalbytes /= 8;
totalbytes += LLImageJ2C::calcHeaderSizeJ2C();
return totalbytes;
}
19 changes: 12 additions & 7 deletions indra/llimagej2coj/llimagej2coj.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,20 @@ class LLImageJ2COJ : public LLImageJ2CImpl
{
public:
LLImageJ2COJ();
virtual ~LLImageJ2COJ();
virtual ~LLImageJ2COJ() override;
protected:
virtual bool getMetadata(LLImageJ2C &base);
virtual bool decodeImpl(LLImageJ2C &base, LLImageRaw &raw_image, F32 decode_time, S32 first_channel, S32 max_channel_count);
virtual bool getMetadata(LLImageJ2C &base) override;
virtual bool decodeImpl(LLImageJ2C &base, LLImageRaw &raw_image, F32 decode_time, S32 first_channel, S32 max_channel_count) override;
virtual bool encodeImpl(LLImageJ2C &base, const LLImageRaw &raw_image, const char* comment_text, F32 encode_time=0.0,
bool reversible = false);
virtual bool initDecode(LLImageJ2C &base, LLImageRaw &raw_image, int discard_level = -1, int* region = NULL);
virtual bool initEncode(LLImageJ2C &base, LLImageRaw &raw_image, int blocks_size = -1, int precincts_size = -1, int levels = 0);
virtual std::string getEngineInfo() const;
bool reversible = false) override;
virtual bool initDecode(LLImageJ2C &base, LLImageRaw &raw_image, int discard_level = -1, int* region = NULL) override;
virtual bool initEncode(LLImageJ2C &base, LLImageRaw &raw_image, int blocks_size = -1, int precincts_size = -1, int levels = 0) override;
virtual std::string getEngineInfo() const override;
public:
// OpenJPEG decodes whole code-blocks even with strict mode off, so the
// lean packet-walk under-allocates and clips quality. Keep the older
// conservative pyramid-with-multiplier estimate here.
virtual S32 estimateDataSize(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate) const override;
};

#endif
30 changes: 30 additions & 0 deletions indra/llkdu/llimagej2ckdu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,3 +1513,33 @@ void kdc_flow_control::process_components()
}
}
}

// Layer-factored byte estimator. Walks the resolution pyramid to count
// layers, weights by layer_factor, then picks between a sqrt-based "new"
// estimate and a raw-dimensions "old" estimate per TextureNewByteRange.
// Reference: https://wiki.lindenlab.com/wiki/THX1138_KDU_Improvements#Byte_Range_Study
S32 LLImageJ2CKDU::estimateDataSize(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate) const
{
S32 width = (w > 0) ? w : 2048;
S32 height = (h > 0) ? h : 2048;
S32 nb_layers = 1;
S32 surface = width * height;
S32 s = MAX_BLOCK_SIZE * MAX_BLOCK_SIZE;
while (surface > s)
{
nb_layers++;
s *= 4;
}
F32 layer_factor = 3.0f * (7 - llclamp(nb_layers, 1, 6));

width >>= discard_level;
height >>= discard_level;
width = llmax(width, 1);
height = llmax(height, 1);

S32 new_bytes = (S32)(sqrtf((F32)(width * height)) * (F32)comp * rate * 1000.f / layer_factor);
S32 old_bytes = (S32)((F32)(width * height * comp) * rate);
S32 bytes = (LLImage::useNewByteRange() && (new_bytes < old_bytes)) ? new_bytes : old_bytes;
bytes = llmax(bytes, LLImageJ2C::calcHeaderSizeJ2C());
return bytes;
}
2 changes: 2 additions & 0 deletions indra/llkdu/llimagej2ckdu.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class LLImageJ2CKDU : public LLImageJ2CImpl
virtual bool initDecode(LLImageJ2C &base, LLImageRaw &raw_image, int discard_level = -1, int* region = NULL);
virtual bool initEncode(LLImageJ2C &base, LLImageRaw &raw_image, int blocks_size = -1, int precincts_size = -1, int levels = 0);
virtual std::string getEngineInfo() const;
public:
virtual S32 estimateDataSize(S32 w, S32 h, S32 comp, S32 discard_level, F32 rate) const;

private:
bool initDecode(LLImageJ2C &base, LLImageRaw &raw_image, F32 decode_time, ECodeStreamMode mode, S32 first_channel, S32 max_channel_count, int discard_level = -1, int* region = NULL);
Expand Down
2 changes: 2 additions & 0 deletions indra/llkdu/tests/llimagej2ckdu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ void LLImageBase::setSize(S32 , S32 , S32 ) { }
bool LLImageBase::isBufferInvalid() const { return false; }

LLImageJ2CImpl::~LLImageJ2CImpl() { }
bool LLImage::sUseNewByteRange = false;
S32 LLImageJ2C::calcHeaderSizeJ2C() { return 0; }

LLImageFormatted::LLImageFormatted(S8 ) { }
LLImageFormatted::~LLImageFormatted() { }
Expand Down
96 changes: 81 additions & 15 deletions indra/llrender/llimagegl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ static LLMutex sTexMemMutex;
static std::unordered_map<U32, U64> sTextureAllocs;
static U64 sTextureBytes = 0;

// track a texture alloc on the currently bound texture.
// asserts that no currently tracked alloc exists
// Per-mip upload paths call this once per level; only free_tex_image
// removes a texture's accounting entirely.
void LLImageGLMemory::alloc_tex_image(U32 width, U32 height, U32 intformat, U32 count)
{
U32 texUnit = gGL.getCurrentTexUnitIndex();
Expand All @@ -80,15 +80,46 @@ void LLImageGLMemory::alloc_tex_image(U32 width, U32 height, U32 intformat, U32

sTexMemMutex.lock();

// it is a precondition that no existing allocation exists for this texture
llassert(sTextureAllocs.find(texName) == sTextureAllocs.end());

sTextureAllocs[texName] = size;
auto iter = sTextureAllocs.find(texName);
if (iter != sTextureAllocs.end())
{
iter->second += size;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I take it something substructs the old value? Where does it happen?

}
else
{
sTextureAllocs[texName] = size;
}
sTextureBytes += size;

sTexMemMutex.unlock();
}

// Add mip 1..N bytes to existing accounting. Use after glGenerateMipmap.
void LLImageGLMemory::account_extra_mip_bytes(U32 base_width, U32 base_height, U32 intformat)
{
U64 extra = 0;
U32 w = base_width;
U32 h = base_height;
while (w > 1 || h > 1)
{
w = w > 1 ? w >> 1 : 1;
h = h > 1 ? h >> 1 : 1;
extra += LLImageGL::dataFormatBytes(intformat, w, h);
}

U32 texUnit = gGL.getCurrentTexUnitIndex();
U32 texName = gGL.getTexUnit(texUnit)->getCurrTexture();

sTexMemMutex.lock();
auto iter = sTextureAllocs.find(texName);
if (iter != sTextureAllocs.end())
{
iter->second += extra;
sTextureBytes += extra;
}
sTexMemMutex.unlock();
}

// track texture free on given texName
void LLImageGLMemory::free_tex_image(U32 texName)
{
Expand Down Expand Up @@ -684,7 +715,10 @@ void LLImageGL::dump()
//----------------------------------------------------------------------------
void LLImageGL::forceUpdateBindStats(void) const
{
mLastBindTime = sLastFrameTime;
// Intentionally a no-op: mLastBindTime is written only by real bind
// paths so the staleness signal reflects actual GPU use. Callers that
// still invoke this (avatar "keep alive" sites, deleted-texture
// fallback) no longer falsely refresh staleness.
}

bool LLImageGL::updateBindStats() const
Expand Down Expand Up @@ -838,7 +872,7 @@ bool LLImageGL::setImage(const U8* data_in, bool data_hasmips /* = false */, S32
mMipLevels = wpo2(llmax(w, h));

//use legacy mipmap generation mode (note: making this condional can cause rendering issues)
// -- but making it not conditional triggers deprecation warnings when core profile is enabled
// - but making it not conditional triggers deprecation warnings when core profile is enabled
// (some rendering issues while core profile is enabled are acceptable at this point in time)
if (!LLRender::sGLCoreProfile)
{
Expand All @@ -864,6 +898,7 @@ bool LLImageGL::setImage(const U8* data_in, bool data_hasmips /* = false */, S32
{
LL_PROFILE_GPU_ZONE("generate mip map");
glGenerateMipmap(mTarget);
account_extra_mip_bytes(w, h, mFormatInternal);
}
stop_glerror();
}
Expand Down Expand Up @@ -1461,7 +1496,12 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
LL_PROFILE_ZONE_NUM(width);
LL_PROFILE_ZONE_NUM(height);

free_cur_tex_image();
// Release prior accounting only on the base mip; per-mip iteration
// accumulates the rest via the additive alloc_tex_image.
if (miplevel == 0)
{
free_cur_tex_image();
}
const bool use_sub_image = should_stagger_image_set(compress);
if (!use_sub_image)
{
Expand Down Expand Up @@ -1613,7 +1653,6 @@ bool LLImageGL::createGLTexture(S32 discard_level, const LLImageRaw* imageraw, S
{
destroyGLTexture();
mCurrentDiscardLevel = discard_level;
mLastBindTime = sLastFrameTime;
mGLTextureCreated = false;
return true ;
}
Expand Down Expand Up @@ -1729,9 +1768,7 @@ bool LLImageGL::createGLTexture(S32 discard_level, const U8* data_in, bool data_


mTextureMemory = (S64Bytes)getMipBytes(mCurrentDiscardLevel);

// mark this as bound at this point, so we don't throw it out immediately
mLastBindTime = sLastFrameTime;
mGLCreateTime = sLastFrameTime;

checkActiveThread();
return true;
Expand Down Expand Up @@ -1858,7 +1895,7 @@ bool LLImageGL::readBackRaw(S32 discard_level, LLImageRaw* imageraw, bool compre
LLGLint is_compressed = 0;
if (compressed_ok)
{
glGetTexLevelParameteriv(mTarget, is_compressed, GL_TEXTURE_COMPRESSED, (GLint*)&is_compressed);
glGetTexLevelParameteriv(mTarget, gl_discard, GL_TEXTURE_COMPRESSED, (GLint*)&is_compressed);
}

//-----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -2039,6 +2076,28 @@ S32 LLImageGL::getWidth(S32 discard_level) const
return width;
}

// static
S32 LLImageGL::dimDerivedMaxDiscard(S32 width, S32 height)
{
if (width <= 0 || height <= 0)
{
return 0;
}
// max(w,h) - min() caps short on rectangular textures
// (1024x512 reaches 1x1 at discard 10, not 9).
return (S32)floorf(log2f((F32)llmax(width, height)));
}

void LLImageGL::stampBound() const
{
// Skip the store on same-frame re-binds - bindFast is per-draw and
// would dirty this cache line per bind per texture otherwise.
if (mLastBindTime != sLastFrameTime)
{
mLastBindTime = sLastFrameTime;
}
}

S64 LLImageGL::getBytes(S32 discard_level) const
{
if (discard_level < 0)
Expand Down Expand Up @@ -2468,7 +2527,12 @@ bool LLImageGL::scaleDown(S32 desired_discard)
return false;
}

desired_discard = llmin(desired_discard, mMaxDiscardLevel);
// GL pyramid reaches 1x1 regardless of codec levels;
// mMaxDiscardLevel is hardcapped at MAX_DISCARD_LEVEL.
S32 dim_max_discard = (mWidth > 0 && mHeight > 0)
? dimDerivedMaxDiscard(mWidth, mHeight)
: (S32)mMaxDiscardLevel;
desired_discard = llmin(desired_discard, dim_max_discard);

if (desired_discard <= mCurrentDiscardLevel)
{
Expand Down Expand Up @@ -2501,6 +2565,7 @@ bool LLImageGL::scaleDown(S32 desired_discard)
LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("scaleDown - glGenerateMipmap");
gGL.getTexUnit(0)->bind(this);
glGenerateMipmap(mTarget);
account_extra_mip_bytes(desired_width, desired_height, mFormatInternal);
gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
}
}
Expand Down Expand Up @@ -2546,6 +2611,7 @@ bool LLImageGL::scaleDown(S32 desired_discard)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("scaleDown - glGenerateMipmap");
glGenerateMipmap(mTarget);
account_extra_mip_bytes(desired_width, desired_height, mFormatInternal);
}

gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
Expand Down
17 changes: 16 additions & 1 deletion indra/llrender/llimagegl.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ class LLWindow;
namespace LLImageGLMemory
{
void alloc_tex_image(U32 width, U32 height, U32 intformat, U32 count);

// Add mip 1..N bytes to existing accounting. Call after glGenerateMipmap
// when only the base mip was accounted; without this the bytes counter
// undercounts mipmap-generated textures by ~25%.
void account_extra_mip_bytes(U32 base_width, U32 base_height, U32 intformat);
void free_tex_image(U32 texName);
void free_tex_images(U32 count, const U32* texNames);
void free_cur_tex_image();
Expand Down Expand Up @@ -151,6 +156,15 @@ class LLImageGL : public LLRefCount
S32 getDiscardLevel() const { return mCurrentDiscardLevel; }
S32 getMaxDiscardLevel() const { return mMaxDiscardLevel; }

// floor(log2(max(w, h))) - deepest GL pyramid level (down to 1x1).
// Returns 0 for non-positive inputs.
static S32 dimDerivedMaxDiscard(S32 width, S32 height);

// Record the wall-clock bind time - every bind path that touches a
// streaming-managed texture must call this, or the staleness signal
// sees the texture as never-bound and ramps it toward eviction.
void stampBound() const;

// override the current discard level
// should only be used for local textures where you know exactly what you're doing
void setDiscardLevel(S32 level) { mCurrentDiscardLevel = level; }
Expand Down Expand Up @@ -224,7 +238,8 @@ class LLImageGL : public LLRefCount
public:
// Various GL/Rendering options
S64Bytes mTextureMemory;
mutable F32 mLastBindTime; // last time this was bound, by discard level
mutable F32 mLastBindTime = 0.f; // wall-clock time at last stampBound; drives streaming staleness
F32 mGLCreateTime = 0.f; // wall-clock time the GL texture was created; staleness fallback for never-bound textures

private:
U32 createPickMask(S32 pWidth, S32 pHeight);
Expand Down
Loading
Loading