Skip to content

Commit 8fcab5e

Browse files
committed
Fixed wrong form action extraction and MissingSchema errors
1 parent a9625e4 commit 8fcab5e

File tree

1 file changed

+39
-18
lines changed

1 file changed

+39
-18
lines changed

api_app/analyzers_manager/file_analyzers/phishing/phishing_form_compiler.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import logging
22
from datetime import date, timedelta
33
from typing import Dict
4-
from urllib.parse import urlparse
54

65
import requests
76
from faker import Faker # skipcq: BAN-B410
87
from lxml.etree import HTMLParser # skipcq: BAN-B410
98
from lxml.html import document_fromstring
109
from requests import HTTPError, Response
10+
from requests.exceptions import MissingSchema
1111

1212
from api_app.analyzers_manager.classes import FileAnalyzer
1313
from api_app.models import PythonConfig
@@ -138,25 +138,33 @@ def identify_text_input(self, input_name: str) -> str:
138138
return fake_value
139139

140140
def extract_action_attribute(self, form) -> str:
141-
if not (form_action := form.get("action", None)):
141+
form_action: str = form.get("action", None)
142+
if not form_action:
142143
logger.info(
143144
f"'action' attribute not found in form. Defaulting to {self.target_site=}"
144145
)
145146
form_action = self.target_site
146-
147-
# if relative url extracted, clean it from '/' and concatenate everything
148-
# if action was not extracted in previous step the if should not pass as it is a url
149-
if not urlparse(form_action).netloc:
147+
elif form_action.startswith("/"): # pure relative url
150148
logger.info(f"Found relative url in {form_action=}")
149+
form_action = form_action.replace("/", "", 1)
151150
base_site = self.target_site
151+
152152
if base_site.endswith("/"):
153153
base_site = base_site[:-1]
154-
if form_action.startswith("/"):
155-
form_action = form_action.replace("/", "", 1)
154+
form_action = base_site + "/" + form_action
155+
elif (
156+
"." in form_action and "://" not in form_action
157+
): # found a domain (relative file names such as "login.php" should start with /)
158+
logger.info(f"Found a domain in form action {form_action=}")
159+
else:
160+
base_site = self.target_site
156161

162+
if base_site.endswith("/"):
163+
base_site = base_site[:-1]
157164
form_action = base_site + "/" + form_action
158165

159166
logger.info(f"Extracted action to post data to: {form_action}")
167+
160168
return form_action
161169

162170
def compile_form_field(self, form) -> dict:
@@ -200,16 +208,29 @@ def perform_request_to_form(self, form) -> Response:
200208
headers = {
201209
"User-Agent": self.user_agent,
202210
}
203-
response = requests.post(
204-
url=dest_url,
205-
data=params,
206-
headers=headers,
207-
proxies=(
208-
{"http": self.proxy_address, "https": self.proxy_address}
209-
if self.proxy_address
210-
else None
211-
),
212-
)
211+
try:
212+
response = requests.post(
213+
url=dest_url,
214+
data=params,
215+
headers=headers,
216+
proxies=(
217+
{"http": self.proxy_address, "https": self.proxy_address}
218+
if self.proxy_address
219+
else None
220+
),
221+
)
222+
except MissingSchema:
223+
logger.info(f"Adding default 'https://' schema to {dest_url}")
224+
response = requests.post(
225+
url="https://" + dest_url,
226+
data=params,
227+
headers=headers,
228+
proxies=(
229+
{"http": self.proxy_address, "https": self.proxy_address}
230+
if self.proxy_address
231+
else None
232+
),
233+
)
213234
logger.info(f"Request headers: {response.request.headers}")
214235
return response
215236

0 commit comments

Comments
 (0)