diff --git a/Makefile b/Makefile index 71f8600..b716cbe 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ install: .PHONY: lint lint: #python setup.py check -rms - flake8 --ignore=E501,E126,W503 email_validator tests + flake8 --ignore=E501,E126,E121,E125,W503 email_validator tests .PHONY: test test: diff --git a/README.md b/README.md index 6a613bc..400dc96 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,11 @@ The `validate_email` function also accepts the following keyword arguments `test_environment=False`: DNS-based deliverability checks are disabled and `test` and `subdomain.test` domain names are permitted (see below). +`allow_special_domains=False`: Turning off EmailUndeliverableError exception for special top-level domains, such as "arpa", "local" and others. Default is False (restricted). + +`allow_any_top_level_domain=False`: Turn off EmailUndeliverableError exception for top-level domains, which are not matching with regex **[A-Za-z]\Z**, such as "org123". May be useful for local services in isolated environments with special local TLD. Default is False (restricted). + +`allowed_top_level_domains=[]`: Similar with `allow_any_top_level_domain` but working like whitelist. Will be ignored, if `allow_any_top_level_domain=True` or if list is empty. Default is [] (no allowed *bad* domains). ### DNS timeout and cache When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call. The `caching_resolver` function returns one easily for you: diff --git a/email_validator/__init__.py b/email_validator/__init__.py index 3d295ec..59b80fb 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -13,7 +13,10 @@ CHECK_DELIVERABILITY = True TEST_ENVIRONMENT = False DEFAULT_TIMEOUT = 15 # secs - +# Soft validation options +ALLOW_SPECIAL_DOMAINS = False +ALLOW_ANY_TOP_LEVEL_DOMAIN = False +ALLOWED_TOP_LEVEL_DOMAINS = [] # type: ignore # Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these # characters are permitted in email addresses (not taking into # account internationalization): @@ -265,7 +268,10 @@ def validate_email( check_deliverability=CHECK_DELIVERABILITY, test_environment=TEST_ENVIRONMENT, timeout=DEFAULT_TIMEOUT, - dns_resolver=None + dns_resolver=None, + allow_special_domains=ALLOW_SPECIAL_DOMAINS, + allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS # type: ignore ): """ Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of @@ -300,7 +306,13 @@ def validate_email( ret.smtputf8 = local_part_info["smtputf8"] # Validate the email address's domain part syntax and get a normalized form. - domain_part_info = validate_email_domain_part(parts[1], test_environment=test_environment) + domain_part_info = validate_email_domain_part( + parts[1], + test_environment=test_environment, + allow_special_domains=allow_special_domains, + allow_any_top_level_domain=allow_any_top_level_domain, + allowed_top_level_domains=allowed_top_level_domains + ) ret.domain = domain_part_info["domain"] ret.ascii_domain = domain_part_info["ascii_domain"] @@ -460,7 +472,13 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals } -def validate_email_domain_part(domain, test_environment=False): +def validate_email_domain_part( + domain, + test_environment=False, + allow_special_domains=ALLOW_SPECIAL_DOMAINS, + allow_any_top_level_domain=ALLOW_ANY_TOP_LEVEL_DOMAIN, + allowed_top_level_domains=ALLOWED_TOP_LEVEL_DOMAINS # type: ignore + ): # Empty? if len(domain) == 0: raise EmailSyntaxError("There must be something after the @-sign.") @@ -550,20 +568,31 @@ def validate_email_domain_part(domain, test_environment=False): # deliverability errors since they are syntactically valid. # Some might fail DNS-based deliverability checks, but that # can be turned off, so we should fail them all sooner. - for d in SPECIAL_USE_DOMAIN_NAMES: - # See the note near the definition of SPECIAL_USE_DOMAIN_NAMES. - if d == "test" and test_environment: - continue + if not allow_special_domains: + for d in SPECIAL_USE_DOMAIN_NAMES: + # See the note near the definition of SPECIAL_USE_DOMAIN_NAMES. + if d == "test" and test_environment: + continue - if ascii_domain == d or ascii_domain.endswith("." + d): - raise EmailUndeliverableError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n) + if ascii_domain == d or ascii_domain.endswith("." + d): + raise EmailUndeliverableError("The domain name %s is a special-use or reserved name that cannot be used with email." % domain_i18n) # We also know that all TLDs currently end with a letter, and # we'll consider that a non-DNS based deliverability check. - if not re.search(r"[A-Za-z]\Z", ascii_domain): - raise EmailUndeliverableError( - "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n - ) + if not allow_any_top_level_domain: + # We check len() not to slow validating if tld were not allowed + if len(allowed_top_level_domains) > 0: + # We already trust the period of domain name and avoiding IndexError + tld = ascii_domain.split('.')[-1] + if tld not in allowed_top_level_domains: + raise EmailUndeliverableError( + "The domain name %s is not valid. Top-level domain name is not included in allowed_top_level_domains." % domain_i18n + ) + else: + if not re.search(r"[A-Za-z]\Z", ascii_domain): + raise EmailUndeliverableError( + "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n + ) # Return the IDNA ASCII-encoded form of the domain, which is how it # would be transmitted on the wire (except when used with SMTPUTF8 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..7664849 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Just for a right import from email_validator to the test_main.py on modern Python versions