Skip to content

Refactor string attributes and add constant string implementations #2524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 18, 2024
36 changes: 0 additions & 36 deletions integration_tests/test_str_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,6 @@ def test_str_slice():
# TODO:
# assert a[0:5:-1] == ""

def test_str_isalpha():
a: str = "helloworld"
b: str = "hj kl"
c: str = "a12(){}A"
d: str = " "
e: str = ""
res: bool = a.isalpha()
res2: bool = b.isalpha()
res3: bool = c.isalpha()
res4: bool = d.isalpha()
res5: bool = e.isalpha()
assert res == True
assert res2 == False
assert res3 == False
assert res4 == False
assert res5 == False


def test_str_title():
a: str = "hello world"
Expand All @@ -69,23 +52,6 @@ def test_str_title():
assert res3 == "Hello World"
assert res4 == "{Hel1O}World"

def test_str_istitle():
a: str = "Hello World"
b: str = "Hj'kl"
c: str = "hELlo wOrlD"
d: str = " Hello"
e: str = " "
res: bool = a.istitle()
res2: bool = b.istitle()
res3: bool = c.istitle()
res4: bool = d.istitle()
res5: bool = e.istitle()
assert res == True
assert res2 == False
assert res3 == False
assert res4 == True
assert res5 == False

def test_str_repeat():
a: str
a = "Xyz"
Expand Down Expand Up @@ -160,8 +126,6 @@ def check():
test_str_join_empty_list()
test_constant_str_subscript()
test_str_title()
test_str_istitle()
test_str_isalpha()
test_str_split()

check()
50 changes: 50 additions & 0 deletions integration_tests/test_str_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,53 @@ def is_ascii():
assert s.isascii() == True


def is_alpha():
a: str = "helloworld"
b: str = "hj kl"
c: str = "a12(){}A"
d: str = " "
e: str = ""
res: bool = a.isalpha()
res2: bool = b.isalpha()
res3: bool = c.isalpha()
res4: bool = d.isalpha()
res5: bool = e.isalpha()
assert res == True
assert res2 == False
assert res3 == False
assert res4 == False
assert res5 == False

assert "helloworld".isalpha() == True
assert "hj kl".isalpha() == False
assert "a12(){}A".isalpha() == False
assert " ".isalpha() == False
assert "".isalpha() == False


def is_title():
a: str = "Hello World"
b: str = "Hj'kl"
c: str = "hELlo wOrlD"
d: str = " Hello"
e: str = " "
res: bool = a.istitle()
res2: bool = b.istitle()
res3: bool = c.istitle()
res4: bool = d.istitle()
res5: bool = e.istitle()
assert res == True
assert res2 == False
assert res3 == False
assert res4 == True
assert res5 == False

assert "Hello World".istitle() == True
assert "Hj'kl".istitle() == False
assert "hELlo wOrlD".istitle() == False
assert " Hello".istitle() == True
assert " ".istitle() == False

def is_space():
assert "\n".isspace() == True
assert " ".isspace() == True
Expand All @@ -320,6 +367,7 @@ def is_space():
assert s.isspace() == False



def check():
capitalize()
lower()
Expand All @@ -335,6 +383,8 @@ def check():
is_upper()
is_decimal()
is_ascii()
is_alpha()
is_title()
is_space()


Expand Down
75 changes: 53 additions & 22 deletions src/lpython/semantics/python_ast_to_asr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6577,26 +6577,6 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
arg.loc = loc;
arg.m_value = s_var;
fn_args.push_back(al, arg);
} else if (attr_name == "isalpha") {
if (args.size() != 0) {
throw SemanticError("str.isalpha() takes no arguments",
loc);
}
fn_call_name = "_lpython_str_isalpha";
ASR::call_arg_t arg;
arg.loc = loc;
arg.m_value = s_var;
fn_args.push_back(al, arg);
} else if (attr_name == "istitle") {
if (args.size() != 0) {
throw SemanticError("str.istitle() takes no arguments",
loc);
}
fn_call_name = "_lpython_str_istitle";
ASR::call_arg_t arg;
arg.loc = loc;
arg.m_value = s_var;
fn_args.push_back(al, arg);
} else if (attr_name == "title") {
if (args.size() != 0) {
throw SemanticError("str.title() takes no arguments",
Expand Down Expand Up @@ -6813,7 +6793,7 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
/*
String Validation Methods i.e all "is" based functions are handled here
*/
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported
std::string method_name = attr_name.substr(2);

if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) {
Expand Down Expand Up @@ -7116,7 +7096,7 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
* islower() method is limited to English Alphabets currently
* TODO: We can support other characters from Unicode Library
*/
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii", "space"}; // Database of validation methods supported
std::vector<std::string> validation_methods{"lower", "upper", "decimal", "ascii", "space", "alpha", "title"}; // Database of validation methods supported
std::string method_name = attr_name.substr(2);
if(std::find(validation_methods.begin(),validation_methods.end(), method_name) == validation_methods.end()) {
throw SemanticError("String method not implemented: " + attr_name, loc);
Expand Down Expand Up @@ -7214,6 +7194,57 @@ we will have to use something else.
tmp = ASR::make_LogicalConstant_t(al, loc, is_space,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4)));
return;
} else if (attr_name == "isalpha") {
/*
* Specification -
Return True if all characters in the string are alphabets,
and there is at least one character in the string.
*/
bool is_alpha = (s_var.size() != 0);
for (auto &i : s_var) {
if (!((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z'))) {
is_alpha = false;
break;
}
}
tmp = ASR::make_LogicalConstant_t(al, loc, is_alpha,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4)));
return;
} else if (attr_name == "istitle") {
/*
* Specification -
Returns True if all words in the string are in title case,
and there is at least one character in the string.
*/
bool is_title = (s_var.size() != 0);

bool in_word = false; // Represents if we are in a word or not
bool is_alpha_present = false;
for (auto &i : s_var) {
if (i >= 'A' && i <= 'Z') {
is_alpha_present = true;
if (in_word) {
// We have come across an uppercase character in the middle of a word
is_title = false;
break;
} else {
in_word = true;
}
} else if (i >= 'a' && i <= 'z') {
is_alpha_present = true;
if (!in_word) {
//We have come across a lowercase character at the start of a word
is_title = false;
break;
}
} else {
in_word = false;
}
}
is_title = is_title && is_alpha_present;
tmp = ASR::make_LogicalConstant_t(al, loc, is_title,
ASRUtils::TYPE(ASR::make_Logical_t(al, loc, 4)));
return;
} else {
throw SemanticError("'str' object has no attribute '" + attr_name + "'", loc);
}
Expand Down
4 changes: 1 addition & 3 deletions src/runtime/lpython_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,9 +764,7 @@ def _lpython_str_istitle(s: str) -> bool:
ch: str
only_whitespace: bool = True
for ch in s:
if (ch == ' ' or ch == '\t' or ch == '\n') and word_start:
continue # Found a space character at the start of a word
elif ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')):
if ch.isalpha() and (ord('A') <= ord(ch) and ord(ch) <= ord('Z')):
only_whitespace = False
if word_start:
word_start = False
Expand Down