From 3c0d3871d4ded188f9069b7cac2dd65616ce6aac Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 27 May 2019 14:41:49 -0700 Subject: [PATCH 1/6] Make SSO support embedded \0s like native String Supercedes #6027 Make SSO more generic by keeping track of its length explicitly, allowing for embedded \0s to exist in the String (just like the non-SSO ones). --- cores/esp8266/WString.cpp | 28 +++++++++++++++------------- cores/esp8266/WString.h | 32 +++++++++++++++++--------------- tests/host/core/test_string.cpp | 16 ++++++++++------ 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/cores/esp8266/WString.cpp b/cores/esp8266/WString.cpp index 69444696aa..e70bc2e5ee 100644 --- a/cores/esp8266/WString.cpp +++ b/cores/esp8266/WString.cpp @@ -136,7 +136,7 @@ inline void String::init(void) { } void String::invalidate(void) { - if(!sso() && wbuffer()) + if(!isSSO() && wbuffer()) free(wbuffer()); init(); } @@ -154,17 +154,19 @@ unsigned char String::reserve(unsigned int size) { unsigned char String::changeBuffer(unsigned int maxStrLen) { // Can we use SSO here to avoid allocation? - if (maxStrLen < sizeof(sso_buf)) { - if (sso() || !buffer()) { + if (maxStrLen < sizeof(sso.buff) - 1) { + if (isSSO() || !buffer()) { // Already using SSO, nothing to do setSSO(true); return 1; - } else { // if bufptr && !sso() - // Using bufptr, need to shrink into sso_buff - char temp[sizeof(sso_buf)]; + } else { // if bufptr && !isSSO() + // Using bufptr, need to shrink into sso.buff + char temp[sizeof(sso.buff)]; memcpy(temp, buffer(), maxStrLen); free(wbuffer()); + uint16_t oldLen = len(); setSSO(true); + setLen(oldLen); memcpy(wbuffer(), temp, maxStrLen); return 1; } @@ -176,12 +178,12 @@ unsigned char String::changeBuffer(unsigned int maxStrLen) { return false; } uint16_t oldLen = len(); - char *newbuffer = (char *) realloc(sso() ? nullptr : wbuffer(), newSize); - if(newbuffer) { + char *newbuffer = (char *) realloc(isSSO() ? nullptr : wbuffer(), newSize); + if (newbuffer) { size_t oldSize = capacity() + 1; // include NULL. - if (sso()) { + if (isSSO()) { // Copy the SSO buffer into allocated space - memcpy(newbuffer, sso_buf, sizeof(sso_buf)); + memcpy(newbuffer, sso.buff, sizeof(sso.buff)); } if (newSize > oldSize) { @@ -229,15 +231,15 @@ void String::move(String &rhs) { rhs.invalidate(); return; } else { - if (!sso()) { + if (!isSSO()) { free(wbuffer()); setBuffer(nullptr); } } } - if (rhs.sso()) { + if (rhs.isSSO()) { setSSO(true); - memmove(sso_buf, rhs.sso_buf, sizeof(sso_buf)); + memmove(sso.buff, rhs.sso.buff, sizeof(sso.buff)); } else { setSSO(false); setBuffer(rhs.wbuffer()); diff --git a/cores/esp8266/WString.h b/cores/esp8266/WString.h index 3192a9ee33..4039efb400 100644 --- a/cores/esp8266/WString.h +++ b/cores/esp8266/WString.h @@ -250,27 +250,29 @@ class String { uint16_t cap; uint16_t len; }; - - // SSO is handled by checking the last byte of sso_buff. - // When not in SSO mode, that byte is set to 0xff, while when in SSO mode it is always 0x00 (so it can serve as the string terminator as well as a flag) - // This allows strings up up to 12 (11 + \0 termination) without any extra space. - enum { SSOSIZE = sizeof(struct _ptr) + 4 }; // Characters to allocate space for SSO, must be 12 or more + // This allows strings up up to 11 (10 + \0 termination) without any extra space. + enum { SSOSIZE = sizeof(struct _ptr) + 4 - 1 }; // Characters to allocate space for SSO, must be 12 or more + struct _sso { + char buff[SSOSIZE]; + unsigned len : 7; + unsigned flag : 1; + }; enum { CAPACITY_MAX = 65535 }; // If size of capacity changed, be sure to update this enum union { struct _ptr ptr; - char sso_buf[SSOSIZE]; + struct _sso sso; }; // Accessor functions - inline bool sso() const { return sso_buf[SSOSIZE - 1] == 0; } - inline unsigned int len() const { return sso() ? strlen(sso_buf) : ptr.len; } - inline unsigned int capacity() const { return sso() ? SSOSIZE - 1 : ptr.cap; } - inline void setSSO(bool sso) { sso_buf[SSOSIZE - 1] = sso ? 0x00 : 0xff; } - inline void setLen(int len) { if (!sso()) ptr.len = len; } - inline void setCapacity(int cap) { if (!sso()) ptr.cap = cap; } - inline void setBuffer(char *buff) { if (!sso()) ptr.buff = buff; } + inline bool isSSO() const { return sso.flag; } + inline unsigned int len() const { return isSSO() ? sso.len : ptr.len; } + inline unsigned int capacity() const { return isSSO() ? (unsigned int)SSOSIZE - 1 : ptr.cap; } + inline void setSSO(bool set) { sso.flag = set; } + inline void setLen(int len) { if (!isSSO()) ptr.len = len; else sso.len = len; } + inline void setCapacity(int cap) { if (!isSSO()) ptr.cap = cap; } + inline void setBuffer(char *buff) { if (!isSSO()) ptr.buff = buff; } // Buffer accessor functions - inline const char *buffer() const { return (const char *)(sso() ? sso_buf : ptr.buff); } - inline char *wbuffer() const { return sso() ? const_cast(sso_buf) : ptr.buff; } // Writable version of buffer + inline const char *buffer() const { return (const char *)(isSSO() ? sso.buff : ptr.buff); } + inline char *wbuffer() const { return isSSO() ? const_cast(sso.buff) : ptr.buff; } // Writable version of buffer protected: void init(void); diff --git a/tests/host/core/test_string.cpp b/tests/host/core/test_string.cpp index f5865b8e80..f4a74ceb6b 100644 --- a/tests/host/core/test_string.cpp +++ b/tests/host/core/test_string.cpp @@ -320,11 +320,11 @@ TEST_CASE("String SSO works", "[core][String]") REQUIRE(s.c_str() == savesso); REQUIRE(s == "0123456789"); REQUIRE(s.length() == 10); - s += "a"; - REQUIRE(s.c_str() == savesso); - REQUIRE(s == "0123456789a"); - REQUIRE(s.length() == 11); if (sizeof(savesso) == 4) { + s += "a"; + REQUIRE(s.c_str() != savesso); + REQUIRE(s == "0123456789a"); + REQUIRE(s.length() == 11); s += "b"; REQUIRE(s.c_str() != savesso); REQUIRE(s == "0123456789ab"); @@ -334,12 +334,16 @@ TEST_CASE("String SSO works", "[core][String]") REQUIRE(s == "0123456789abc"); REQUIRE(s.length() == 13); } else { + s += "a"; + REQUIRE(s.c_str() == savesso); + REQUIRE(s == "0123456789a"); + REQUIRE(s.length() == 11); s += "bcde"; REQUIRE(s.c_str() == savesso); REQUIRE(s == "0123456789abcde"); REQUIRE(s.length() == 15); s += "fghi"; - REQUIRE(s.c_str() == savesso); + REQUIRE(s.c_str() != savesso); REQUIRE(s == "0123456789abcdefghi"); REQUIRE(s.length() == 19); s += "j"; @@ -360,7 +364,7 @@ TEST_CASE("String SSO works", "[core][String]") REQUIRE(s.length() == 23); s += "nopq"; REQUIRE(s.c_str() != savesso); - REQUIRE(s == "0123456789abcdefghijklmnopq"); + REQUIRE(s == "0123456789abcdefghijklmnopq"); REQUIRE(s.length() == 27); s += "rstu"; REQUIRE(s.c_str() != savesso); From df81cbe1b43fc7b62e2e5b4588044f9e17200676 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 27 May 2019 15:09:15 -0700 Subject: [PATCH 2/6] Use memmove instead of strlen, add \0 in String test Use memmove/memcpy_P when we know the length of a string to save CPU time. Add tests to inject \0s in a String to ensure it is still working as designed. --- cores/esp8266/WString.cpp | 14 ++++++------- tests/host/core/test_string.cpp | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/cores/esp8266/WString.cpp b/cores/esp8266/WString.cpp index e70bc2e5ee..0642ae8650 100644 --- a/cores/esp8266/WString.cpp +++ b/cores/esp8266/WString.cpp @@ -183,7 +183,7 @@ unsigned char String::changeBuffer(unsigned int maxStrLen) { size_t oldSize = capacity() + 1; // include NULL. if (isSSO()) { // Copy the SSO buffer into allocated space - memcpy(newbuffer, sso.buff, sizeof(sso.buff)); + memmove(newbuffer, sso.buff, sizeof(sso.buff)); } if (newSize > oldSize) { @@ -208,7 +208,7 @@ String & String::copy(const char *cstr, unsigned int length) { return *this; } setLen(length); - strcpy(wbuffer(), cstr); + memmove(wbuffer(), cstr, length + 1); return *this; } @@ -218,7 +218,7 @@ String & String::copy(const __FlashStringHelper *pstr, unsigned int length) { return *this; } setLen(length); - strcpy_P(wbuffer(), (PGM_P)pstr); + memcpy_P(wbuffer(), (PGM_P)pstr, length + 1); // We know wbuffer() cannot ever be in PROGMEM, so memcpy safe here return *this; } @@ -226,7 +226,7 @@ String & String::copy(const __FlashStringHelper *pstr, unsigned int length) { void String::move(String &rhs) { if(buffer()) { if(capacity() >= rhs.len()) { - strcpy(wbuffer(), rhs.buffer()); + memmove(wbuffer(), rhs.buffer(), rhs.length() + 1); setLen(rhs.len()); rhs.invalidate(); return; @@ -311,7 +311,7 @@ unsigned char String::concat(const String &s) { return 1; if (!reserve(newlen)) return 0; - memcpy(wbuffer() + len(), buffer(), len()); + memmove(wbuffer() + len(), buffer(), len()); setLen(newlen); wbuffer()[len()] = 0; return 1; @@ -328,7 +328,7 @@ unsigned char String::concat(const char *cstr, unsigned int length) { return 1; if(!reserve(newlen)) return 0; - strcpy(wbuffer() + len(), cstr); + memmove(wbuffer() + len(), cstr, length + 1); setLen(newlen); return 1; } @@ -394,7 +394,7 @@ unsigned char String::concat(const __FlashStringHelper * str) { if (length == 0) return 1; unsigned int newlen = len() + length; if (!reserve(newlen)) return 0; - strcpy_P(wbuffer() + len(), (PGM_P)str); + memcpy_P(wbuffer() + len(), (PGM_P)str, length + 1); setLen(newlen); return 1; } diff --git a/tests/host/core/test_string.cpp b/tests/host/core/test_string.cpp index f4a74ceb6b..35fc4f8273 100644 --- a/tests/host/core/test_string.cpp +++ b/tests/host/core/test_string.cpp @@ -456,3 +456,38 @@ TEST_CASE("Issue #2736 - StreamString SSO fix", "[core][StreamString]") s.print('\"'); REQUIRE(s == "{\"message\""); } + +TEST_CASE("Strings with NULs", "[core][String]") +{ + // The following should never be done in a real app! This is only to inject 0s in the middle of a string. + // Fits in SSO... + String str("01234567"); + REQUIRE(str.length() == 8); + char *ptr = (char *)str.c_str(); + ptr[3] = 0; + String str2; + str2 = str; + REQUIRE(str2.length() == 8); + // Needs a buffer pointer + str = "0123456789012345678901234567890123456789"; + ptr = (char *)str.c_str(); + ptr[3] = 0; + str2 = str; + REQUIRE(str2.length() == 40); + String str3("a"); + ptr = (char *)str3.c_str(); + *ptr = 0; + REQUIRE(str3.length() == 1); + str3 += str3; + REQUIRE(str3.length() == 2); + str3 += str3; + REQUIRE(str3.length() == 4); + str3 += str3; + REQUIRE(str3.length() == 8); + str3 += str3; + REQUIRE(str3.length() == 16); + str3 += str3; + REQUIRE(str3.length() == 32); + str3 += str3; + REQUIRE(str3.length() == 64); +} From 32ba7049ba65734f97f33ca992b00ee3b81a6476 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 27 May 2019 15:59:12 -0700 Subject: [PATCH 3/6] Check that the 0-string is really 0s in test --- tests/host/core/test_string.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/host/core/test_string.cpp b/tests/host/core/test_string.cpp index 35fc4f8273..a6488ee6e6 100644 --- a/tests/host/core/test_string.cpp +++ b/tests/host/core/test_string.cpp @@ -490,4 +490,7 @@ TEST_CASE("Strings with NULs", "[core][String]") REQUIRE(str3.length() == 32); str3 += str3; REQUIRE(str3.length() == 64); + static char zeros[64] = {0}; + const char *p = str3.c_str(); + REQUIRE(!memcmp(p, zeros, 64)); } From 4705dadf332eb07221bbe2ffd115db6287c3975f Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Mon, 27 May 2019 16:07:32 -0700 Subject: [PATCH 4/6] Fix valgrind error, cleanup code --- cores/esp8266/WString.cpp | 4 +++- cores/esp8266/WString.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cores/esp8266/WString.cpp b/cores/esp8266/WString.cpp index 0642ae8650..02307eeacb 100644 --- a/cores/esp8266/WString.cpp +++ b/cores/esp8266/WString.cpp @@ -31,7 +31,7 @@ String::String(const char *cstr) { init(); - if(cstr) + if (cstr) copy(cstr, strlen(cstr)); } @@ -157,7 +157,9 @@ unsigned char String::changeBuffer(unsigned int maxStrLen) { if (maxStrLen < sizeof(sso.buff) - 1) { if (isSSO() || !buffer()) { // Already using SSO, nothing to do + uint16_t oldLen = len(); setSSO(true); + setLen(oldLen); return 1; } else { // if bufptr && !isSSO() // Using bufptr, need to shrink into sso.buff diff --git a/cores/esp8266/WString.h b/cores/esp8266/WString.h index 4039efb400..fcd5853b51 100644 --- a/cores/esp8266/WString.h +++ b/cores/esp8266/WString.h @@ -267,7 +267,7 @@ class String { inline unsigned int len() const { return isSSO() ? sso.len : ptr.len; } inline unsigned int capacity() const { return isSSO() ? (unsigned int)SSOSIZE - 1 : ptr.cap; } inline void setSSO(bool set) { sso.flag = set; } - inline void setLen(int len) { if (!isSSO()) ptr.len = len; else sso.len = len; } + inline void setLen(int len) { if (isSSO()) sso.len = len; else ptr.len = len; } inline void setCapacity(int cap) { if (!isSSO()) ptr.cap = cap; } inline void setBuffer(char *buff) { if (!isSSO()) ptr.buff = buff; } // Buffer accessor functions From db24311fbfac87cc18c22e295272633465d7ec98 Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Tue, 28 May 2019 07:23:55 -0700 Subject: [PATCH 5/6] Update comments/var names per review --- cores/esp8266/WString.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cores/esp8266/WString.h b/cores/esp8266/WString.h index fcd5853b51..ea077bcd74 100644 --- a/cores/esp8266/WString.h +++ b/cores/esp8266/WString.h @@ -254,19 +254,19 @@ class String { enum { SSOSIZE = sizeof(struct _ptr) + 4 - 1 }; // Characters to allocate space for SSO, must be 12 or more struct _sso { char buff[SSOSIZE]; - unsigned len : 7; - unsigned flag : 1; + unsigned len : 7; + unsigned isSSO : 1; }; - enum { CAPACITY_MAX = 65535 }; // If size of capacity changed, be sure to update this enum + enum { CAPACITY_MAX = 65535 }; // If typeof(cap) changed from uint16_t, be sure to update this enum to the max value storable in the type union { struct _ptr ptr; struct _sso sso; }; // Accessor functions - inline bool isSSO() const { return sso.flag; } + inline bool isSSO() const { return sso.isSSO; } inline unsigned int len() const { return isSSO() ? sso.len : ptr.len; } - inline unsigned int capacity() const { return isSSO() ? (unsigned int)SSOSIZE - 1 : ptr.cap; } - inline void setSSO(bool set) { sso.flag = set; } + inline unsigned int capacity() const { return isSSO() ? (unsigned int)SSOSIZE - 1 : ptr.cap; } // Size of max string not including terminal NUL + inline void setSSO(bool set) { sso.isSSO = set; } inline void setLen(int len) { if (isSSO()) sso.len = len; else ptr.len = len; } inline void setCapacity(int cap) { if (!isSSO()) ptr.cap = cap; } inline void setBuffer(char *buff) { if (!isSSO()) ptr.buff = buff; } From 7d5343a6ac127d2ae675761bb507229bc0ed100d Mon Sep 17 00:00:00 2001 From: "Earle F. Philhower, III" Date: Wed, 5 Jun 2019 07:23:55 -0700 Subject: [PATCH 6/6] Add comments and explicit packing request for future reference --- cores/esp8266/WString.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cores/esp8266/WString.h b/cores/esp8266/WString.h index ea077bcd74..48b96f08f8 100644 --- a/cores/esp8266/WString.h +++ b/cores/esp8266/WString.h @@ -254,9 +254,9 @@ class String { enum { SSOSIZE = sizeof(struct _ptr) + 4 - 1 }; // Characters to allocate space for SSO, must be 12 or more struct _sso { char buff[SSOSIZE]; - unsigned len : 7; - unsigned isSSO : 1; - }; + unsigned char len : 7; // Ensure only one byte is allocated by GCC for the bitfields + unsigned char isSSO : 1; + } __attribute__((packed)); // Ensure that GCC doesn't expand the flag byte to a 32-bit word for alignment issues enum { CAPACITY_MAX = 65535 }; // If typeof(cap) changed from uint16_t, be sure to update this enum to the max value storable in the type union { struct _ptr ptr;