From 3d527548106c17d6c8c358a45722697691e8c472 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 16:00:45 +0530 Subject: [PATCH 1/8] Refactoring string attributes to reduce redundant code --- src/lpython/semantics/python_ast_to_asr.cpp | 22 +-------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 8487a4ac54..02e2027db2 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -6568,26 +6568,6 @@ class BodyVisitor : public CommonVisitor { arg.loc = loc; arg.m_value = s_var; fn_args.push_back(al, arg); - } else if (attr_name == "isalpha") { - if (args.size() != 0) { - throw SemanticError("str.isalpha() takes no arguments", - loc); - } - fn_call_name = "_lpython_str_isalpha"; - ASR::call_arg_t arg; - arg.loc = loc; - arg.m_value = s_var; - fn_args.push_back(al, arg); - } else if (attr_name == "istitle") { - if (args.size() != 0) { - throw SemanticError("str.istitle() takes no arguments", - loc); - } - fn_call_name = "_lpython_str_istitle"; - ASR::call_arg_t arg; - arg.loc = loc; - arg.m_value = s_var; - fn_args.push_back(al, arg); } else if (attr_name == "title") { if (args.size() != 0) { throw SemanticError("str.title() takes no arguments", @@ -6804,7 +6784,7 @@ class BodyVisitor : public CommonVisitor { /* String Validation Methods i.e all "is" based functions are handled here */ - std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported + std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported std::string method_name = attr_name.substr(2); if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) { From b880b0f2bf8418b0e2df3a20212037c7c97ac063 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 17:13:23 +0530 Subject: [PATCH 2/8] Added constant string implementation of isalpha --- src/lpython/semantics/python_ast_to_asr.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 02e2027db2..0bb199de97 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -7052,7 +7052,7 @@ class BodyVisitor : public CommonVisitor { * islower() method is limited to English Alphabets currently * TODO: We can support other characters from Unicode Library */ - std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported + std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha"}; // Database of validation methods supported std::string method_name = attr_name.substr(2); if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) { throw SemanticError("String method not implemented: " + attr_name, loc); @@ -7150,6 +7150,22 @@ we will have to use something else. tmp = ASR::make_LogicalConstant_t(al, loc, is_space, ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); return; + } else if (attr_name == "isalpha") { + /* + * Specification - + Return True if all characters in the string are alphabets, + and there is at least one character in the string. + */ + bool is_alpha = (s_var.size() != 0); + for (auto &i : s_var) { + if ((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z')) { + is_alpha = false; + break; + } + } + tmp = ASR::make_LogicalConstant_t(al, loc, is_alpha, + ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); + return; } else { throw SemanticError("'str' object has no attribute '" + attr_name + "'", loc); } From baa71c4202ee7cc1b2a7a7caa1b504ecd1009a16 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 17:38:51 +0530 Subject: [PATCH 3/8] Added constant string implementation of istitle --- src/lpython/semantics/python_ast_to_asr.cpp | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 0bb199de97..519adb1d2b 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -7052,7 +7052,7 @@ class BodyVisitor : public CommonVisitor { * islower() method is limited to English Alphabets currently * TODO: We can support other characters from Unicode Library */ - std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha"}; // Database of validation methods supported + std::vector validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported std::string method_name = attr_name.substr(2); if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) { throw SemanticError("String method not implemented: " + attr_name, loc); @@ -7166,6 +7166,41 @@ we will have to use something else. tmp = ASR::make_LogicalConstant_t(al, loc, is_alpha, ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); return; + } else if (attr_name == "istitle") { + /* + * Specification - + Returns True if all words in the string are in title case, + and there is at least one character in the string. + */ + bool is_title = (s_var.size() != 0); + + bool in_word = false; // Represents if we are in a word or not + bool is_alpha_present = false; + for (auto &i : s_var) { + if (i >= 'A' && i <= 'Z') { + is_alpha_present = true; + if (in_word) { + // We have come across an uppercase character in the middle of a word + is_title = false; + break; + } else { + in_word = true; + } + } else if (i >= 'a' && i <= 'z') { + is_alpha_present = true; + if (!in_word) { + //We have come across a lowercase character at the start of a word + is_title = false; + break; + } + } else { + in_word = false; + } + } + is_title = is_title && is_alpha_present; + tmp = ASR::make_LogicalConstant_t(al, loc, is_title, + ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4))); + return; } else { throw SemanticError("'str' object has no attribute '" + attr_name + "'", loc); } From deed28246f78e6b1e9a50ca7d22e724fc59be440 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 17:48:04 +0530 Subject: [PATCH 4/8] Added isalpha tests for constant string implementation, and moved the tests to its appropriate location --- integration_tests/test_str_01.py | 18 ------ integration_tests/test_str_attributes.py | 72 ++++++++++++++++++------ 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/integration_tests/test_str_01.py b/integration_tests/test_str_01.py index be31827fac..8cf01b1172 100644 --- a/integration_tests/test_str_01.py +++ b/integration_tests/test_str_01.py @@ -37,23 +37,6 @@ def test_str_slice(): # TODO: # assert a[0:5:-1] == "" -def test_str_isalpha(): - a: str = "helloworld" - b: str = "hj kl" - c: str = "a12(){}A" - d: str = " " - e: str = "" - res: bool = a.isalpha() - res2: bool = b.isalpha() - res3: bool = c.isalpha() - res4: bool = d.isalpha() - res5: bool = e.isalpha() - assert res == True - assert res2 == False - assert res3 == False - assert res4 == False - assert res5 == False - def test_str_title(): a: str = "hello world" @@ -161,7 +144,6 @@ def check(): test_constant_str_subscript() test_str_title() test_str_istitle() - test_str_isalpha() test_str_split() check() diff --git a/integration_tests/test_str_attributes.py b/integration_tests/test_str_attributes.py index 2124e38026..c6e2c1a501 100755 --- a/integration_tests/test_str_attributes.py +++ b/integration_tests/test_str_attributes.py @@ -21,6 +21,7 @@ def lower(): assert "DDd12Vv" .lower() == "ddd12vv" assert "".lower() == "" + def upper(): s: str s = "AaaaAABBbbbbBB!@12223BN" @@ -28,6 +29,7 @@ def upper(): assert "DDd12Vv".upper() == "DDD12VV" assert "".upper() == "" + def strip(): s: str s = " AASAsaSas " @@ -88,14 +90,15 @@ def startswith(): assert s.startswith("sdd") == False assert "".startswith("ok") == False + def endswith(): # The following test suite fulfils the control flow graph coverage # in terms of Statement Coverage and Branch Coverage associated with endwith() functionality. - # Case 1: When string is constant and suffix is also constant + # Case 1: When string is constant and suffix is also constant assert "".endswith("") == True - assert "".endswith(" ") == False + assert "".endswith(" ") == False assert "".endswith("%") == False assert "".endswith("a1234PT#$") == False assert "".endswith("blah blah") == False @@ -105,13 +108,12 @@ def endswith(): assert " rendezvous 5:30 ".endswith("apple") == False assert "two plus".endswith("longer than string") == False - # Case 2: When string is constant and suffix is variable suffix: str suffix = "" assert "".endswith(suffix) == True suffix = " " - assert "".endswith(suffix) == False + assert "".endswith(suffix) == False suffix = "5:30 " assert " rendezvous 5:30 ".endswith(suffix) == True suffix = "" @@ -138,13 +140,14 @@ def endswith(): suffix = "apple" assert s.endswith(suffix) == False + def partition(): - - # Note: Both string or seperator cannot be empty - # Case 1: When string is constant and seperator is also constant - assert " ".partition(" ") == (""," "," ") - assert "apple mango".partition(" ") == ("apple"," ","mango") - assert "applemango".partition("afdnjkfsn") == ("applemango","","") + + # Note: Both string or seperator cannot be empty + # Case 1: When string is constant and seperator is also constant + assert " ".partition(" ") == ("", " ", " ") + assert "apple mango".partition(" ") == ("apple", " ", "mango") + assert "applemango".partition("afdnjkfsn") == ("applemango", "", "") assert "applemango".partition("an") == ("applem", "an", "go") assert "applemango".partition("mango") == ("apple", "mango", "") assert "applemango".partition("applemango") == ("", "applemango", "") @@ -154,15 +157,17 @@ def partition(): # Case 2: When string is constant and seperator is variable seperator: str seperator = " " - assert " ".partition(seperator) == (""," "," ") + assert " ".partition(seperator) == ("", " ", " ") seperator = " " - assert "apple mango".partition(seperator) == ("apple"," ","mango") + assert "apple mango".partition(seperator) == ("apple", " ", "mango") seperator = "5:30 " - assert " rendezvous 5:30 ".partition(seperator) == (" rendezvous ", "5:30 ", "") + assert " rendezvous 5:30 ".partition( + seperator) == (" rendezvous ", "5:30 ", "") seperator = "^&" assert "@#$%^&*()#!".partition(seperator) == ("@#$%", "^&", "*()#!") seperator = "daddada " - assert " rendezvous 5:30 ".partition(seperator) == (" rendezvous 5:30 ", "", "") + assert " rendezvous 5:30 ".partition( + seperator) == (" rendezvous 5:30 ", "", "") seperator = "longer than string" assert "two plus".partition(seperator) == ("two plus", "", "") @@ -182,6 +187,7 @@ def partition(): seperator = "apple" assert s.partition(seperator) == ("rendezvous 5", "", "") + def is_lower(): # Case 1: When constant string is present assert "".islower() == False @@ -204,8 +210,9 @@ def is_lower(): s = "apple is a fruit" assert s.islower() == True + def is_upper(): - # Case 1: When constant string is present + # Case 1: When constant string is present assert "".isupper() == False assert "apple".isupper() == False assert "4432632479".isupper() == False @@ -226,6 +233,7 @@ def is_upper(): s = "APPLE IS A FRUIT" assert s.isupper() == True + def is_decimal(): # Case 1: When constant string is present assert "".isdecimal() == False @@ -251,6 +259,7 @@ def is_decimal(): s = "12 34" assert s.isdecimal() == False + def is_ascii(): # Case 1: When constant string is present assert "".isascii() == True @@ -277,16 +286,41 @@ def is_ascii(): assert s.isascii() == True +def is_alpha(): + a: str = "helloworld" + b: str = "hj kl" + c: str = "a12(){}A" + d: str = " " + e: str = "" + res: bool = a.isalpha() + res2: bool = b.isalpha() + res3: bool = c.isalpha() + res4: bool = d.isalpha() + res5: bool = e.isalpha() + assert res == True + assert res2 == False + assert res3 == False + assert res4 == False + assert res5 == False + + assert "helloworld".isalpha() == True + assert "hj kl".isalpha() == False + assert "a12(){}A".isalpha() == False + assert " ".isalpha() == False + assert "".isalpha() == False + + def is_space(): assert "\n".isspace() == True assert " ".isspace() == True - assert "\r".isspace() == True + assert "\r".isspace() == True - s:str = " " - assert s.isspace() == True + s: str = " " + assert s.isspace() == True s = "a" assert s.isspace() == False + def check(): capitalize() lower() @@ -301,6 +335,8 @@ def check(): is_upper() is_decimal() is_ascii() + is_alpha() is_space() + check() From 66eb5837d52996209c0be07ca4fb6e843d86c8d9 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 17:51:18 +0530 Subject: [PATCH 5/8] Added istitle tests for constant string implementation, and moved the tests to its appropriate location --- integration_tests/test_str_01.py | 17 ----------------- integration_tests/test_str_attributes.py | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/integration_tests/test_str_01.py b/integration_tests/test_str_01.py index 8cf01b1172..08beaa7b25 100644 --- a/integration_tests/test_str_01.py +++ b/integration_tests/test_str_01.py @@ -52,23 +52,6 @@ def test_str_title(): assert res3 == "Hello World" assert res4 == "{Hel1O}World" -def test_str_istitle(): - a: str = "Hello World" - b: str = "Hj'kl" - c: str = "hELlo wOrlD" - d: str = " Hello" - e: str = " " - res: bool = a.istitle() - res2: bool = b.istitle() - res3: bool = c.istitle() - res4: bool = d.istitle() - res5: bool = e.istitle() - assert res == True - assert res2 == False - assert res3 == False - assert res4 == True - assert res5 == False - def test_str_repeat(): a: str a = "Xyz" diff --git a/integration_tests/test_str_attributes.py b/integration_tests/test_str_attributes.py index c6e2c1a501..b9c4d6cc46 100755 --- a/integration_tests/test_str_attributes.py +++ b/integration_tests/test_str_attributes.py @@ -310,6 +310,29 @@ def is_alpha(): assert "".isalpha() == False +def test_str_istitle(): + a: str = "Hello World" + b: str = "Hj'kl" + c: str = "hELlo wOrlD" + d: str = " Hello" + e: str = " " + res: bool = a.istitle() + res2: bool = b.istitle() + res3: bool = c.istitle() + res4: bool = d.istitle() + res5: bool = e.istitle() + assert res == True + assert res2 == False + assert res3 == False + assert res4 == True + assert res5 == False + + assert "Hello World".istitle() == True + assert "Hj'kl".istitle() == False + assert "hELlo wOrlD".istitle() == False + assert " Hello".istitle() == True + assert " ".istitle() == False + def is_space(): assert "\n".isspace() == True assert " ".isspace() == True From 35ffb95d009bde9facb649bce5a582a2ebe0fbf2 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 18:21:43 +0530 Subject: [PATCH 6/8] Rename function to follow convention --- integration_tests/test_str_01.py | 1 - integration_tests/test_str_attributes.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/test_str_01.py b/integration_tests/test_str_01.py index 08beaa7b25..1d65fa9efc 100644 --- a/integration_tests/test_str_01.py +++ b/integration_tests/test_str_01.py @@ -126,7 +126,6 @@ def check(): test_str_join_empty_list() test_constant_str_subscript() test_str_title() - test_str_istitle() test_str_split() check() diff --git a/integration_tests/test_str_attributes.py b/integration_tests/test_str_attributes.py index b9c4d6cc46..537ce6c3fd 100755 --- a/integration_tests/test_str_attributes.py +++ b/integration_tests/test_str_attributes.py @@ -310,7 +310,7 @@ def is_alpha(): assert "".isalpha() == False -def test_str_istitle(): +def is_title(): a: str = "Hello World" b: str = "Hj'kl" c: str = "hELlo wOrlD" @@ -359,6 +359,7 @@ def check(): is_decimal() is_ascii() is_alpha() + is_title() is_space() From bebf5dcbca76d5d91825a0d9235bf4cb1ca20702 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 19:01:29 +0530 Subject: [PATCH 7/8] Fixed isalpha bug --- src/lpython/semantics/python_ast_to_asr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 519adb1d2b..c9ceb2500f 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -7158,7 +7158,7 @@ we will have to use something else. */ bool is_alpha = (s_var.size() != 0); for (auto &i : s_var) { - if ((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z')) { + if (!((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z'))) { is_alpha = false; break; } From 90999bb573224cafa2da487b695327f40fc91372 Mon Sep 17 00:00:00 2001 From: advik Date: Sun, 11 Feb 2024 19:09:44 +0530 Subject: [PATCH 8/8] Removed redundant code in python istitle implementation --- src/runtime/lpython_builtin.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/runtime/lpython_builtin.py b/src/runtime/lpython_builtin.py index 1ac93fbfa9..145bf1e04e 100644 --- a/src/runtime/lpython_builtin.py +++ b/src/runtime/lpython_builtin.py @@ -731,9 +731,7 @@ def _lpython_str_istitle(s: str) -> bool: ch: str only_whitespace: bool = True for ch in s: - if (ch == ' ' or ch == '\t' or ch == '\n') and word_start: - continue # Found a space character at the start of a word - elif ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')): + if ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')): only_whitespace = False if word_start: word_start = False