Skip to content

Add Kodu to the list of clients with supported code patterns #1003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions src/codegate/extract_snippets/body_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ClineCodeSnippetExtractor,
CodeSnippetExtractor,
DefaultCodeSnippetExtractor,
KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)

Expand Down Expand Up @@ -39,6 +40,19 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
filenames.extend(extracted_snippets.keys())
return set(filenames)

def _extract_from_list_user_messages(self, data: dict) -> set[str]:
filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
msgs_content = msg.get("content", [])
for msg_content in msgs_content:
if msg_content.get("type", "") == "text":
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg_content.get("text")
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

@abstractmethod
def extract_unique_filenames(self, data: dict) -> set[str]:
"""
Expand Down Expand Up @@ -70,27 +84,8 @@ class ClineBodySnippetExtractor(BodyCodeSnippetExtractor):
def __init__(self):
self._snippet_extractor = ClineCodeSnippetExtractor()

def _extract_from_user_messages(self, data: dict) -> set[str]:
"""
The method extracts the code snippets from the user messages in the data got from Cline.

It returns a set of filenames extracted from the code snippets.
"""

filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
msgs_content = msg.get("content", [])
for msg_content in msgs_content:
if msg_content.get("type", "") == "text":
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg_content.get("text")
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

def extract_unique_filenames(self, data: dict) -> set[str]:
return self._extract_from_user_messages(data)
return self._extract_from_list_user_messages(data)


class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
Expand Down Expand Up @@ -136,3 +131,12 @@ def extract_unique_filenames(self, data: dict) -> set[str]:
)
filenames.extend(extracted_snippets.keys())
return set(filenames)


class KoduBodySnippetExtractor(BodyCodeSnippetExtractor):

def __init__(self):
self._snippet_extractor = KoduCodeSnippetExtractor()

def extract_unique_filenames(self, data: dict) -> set[str]:
return self._extract_from_list_user_messages(data)
4 changes: 4 additions & 0 deletions src/codegate/extract_snippets/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
BodyCodeSnippetExtractor,
ClineBodySnippetExtractor,
ContinueBodySnippetExtractor,
KoduBodySnippetExtractor,
OpenInterpreterBodySnippetExtractor,
)
from codegate.extract_snippets.message_extractor import (
AiderCodeSnippetExtractor,
ClineCodeSnippetExtractor,
CodeSnippetExtractor,
DefaultCodeSnippetExtractor,
KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)

Expand All @@ -24,6 +26,7 @@ def create_snippet_extractor(detected_client: ClientType) -> BodyCodeSnippetExtr
ClientType.CLINE: ClineBodySnippetExtractor(),
ClientType.AIDER: AiderBodySnippetExtractor(),
ClientType.OPEN_INTERPRETER: OpenInterpreterBodySnippetExtractor(),
ClientType.KODU: KoduBodySnippetExtractor(),
}
return mapping_client_extractor.get(detected_client, ContinueBodySnippetExtractor())

Expand All @@ -37,5 +40,6 @@ def create_snippet_extractor(detected_client: ClientType) -> CodeSnippetExtracto
ClientType.CLINE: ClineCodeSnippetExtractor(),
ClientType.AIDER: AiderCodeSnippetExtractor(),
ClientType.OPEN_INTERPRETER: OpenInterpreterCodeSnippetExtractor(),
ClientType.KODU: KoduCodeSnippetExtractor(),
}
return mapping_client_extractor.get(detected_client, DefaultCodeSnippetExtractor())
25 changes: 25 additions & 0 deletions src/codegate/extract_snippets/message_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@
re.DOTALL,
)

KODU_CONTENT_PATTERN = re.compile(
r"<file\s+path=\"(?P<filename>[^\n>]+)\">" # Match the opening tag with path attribute
r"(?P<content>.*?)" # Match the content (non-greedy)
r"</file>", # Match the closing tag
re.DOTALL,
)


class MatchedPatternSnippet(BaseModel):
"""
Expand Down Expand Up @@ -343,3 +350,21 @@ def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
filename = match.group("filename")
content = match.group("content")
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)


class KoduCodeSnippetExtractor(CodeSnippetExtractor):

@property
def codeblock_pattern(self) -> re.Pattern:
return [KODU_CONTENT_PATTERN]

@property
def codeblock_with_filename_pattern(self) -> re.Pattern:
return [KODU_CONTENT_PATTERN]

def _get_match_pattern_snippet(self, match: re.Match) -> MatchedPatternSnippet:
# We don't have language in the cline pattern
matched_language = None
filename = match.group("filename")
content = match.group("content")
return MatchedPatternSnippet(language=matched_language, filename=filename, content=content)
70 changes: 70 additions & 0 deletions tests/extract_snippets/test_body_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from codegate.extract_snippets.body_extractor import (
ClineBodySnippetExtractor,
ContinueBodySnippetExtractor,
KoduBodySnippetExtractor,
OpenInterpreterBodySnippetExtractor,
)

Expand Down Expand Up @@ -213,3 +214,72 @@ def test_body_extract_continue_snippets(test_case: BodyCodeSnippetTest):
extractor = ContinueBodySnippetExtractor()
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
_evaluate_actual_filenames(filenames, test_case)


@pytest.mark.parametrize(
"test_case",
[
# Analyze processed snippets from Kodu
BodyCodeSnippetTest(
input_body_dict={
"messages": [
{"role": "system", "content": "You are Kodu, an autonomous coding agent."},
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
<task>
please analyze
<additional-context>
- Super critical information, the files attached here are part of the task and need to be
- The URLs attached here need to be scrapped and the information should be used for the
- The files passed in context are provided to help you understand the task better, the
<files count="1"><file path="testing_file.py">import invokehttp
import fastapi
from fastapi import FastAPI, Request, Response, HTTPException
import numpy

GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"

def add(a, b):
return a + b

def multiply(a, b):
return a * b



def substract(a, b):
</file></files>
<urls></urls>
</additional-context>

</task>
""",
}
],
},
{
"type": "text",
"text": """
You must use a tool to proceed. Either use attempt_completion if you've completed the task,
or ask_followup_question if you need more information. you must adhere to the tool format
<kodu_action><tool_name><parameter1_name>value1</parameter1_name><parameter2_name>value2
</parameter2_name>... additional parameters as needed in the same format
...</tool_name></kodu_action>
""",
},
]
},
expected_count=1,
expected=["testing_file.py"],
),
],
)
def test_body_extract_kodu_snippets(test_case: BodyCodeSnippetTest):
extractor = KoduBodySnippetExtractor()
filenames = extractor.extract_unique_filenames(test_case.input_body_dict)
_evaluate_actual_filenames(filenames, test_case)
54 changes: 54 additions & 0 deletions tests/extract_snippets/test_message_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ClineCodeSnippetExtractor,
CodeSnippet,
DefaultCodeSnippetExtractor,
KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)

Expand Down Expand Up @@ -714,6 +715,59 @@ def test_extract_openinterpreter_snippets(test_case: CodeSnippetTest):
_evaluate_actual_snippets(snippets, test_case)


@pytest.mark.parametrize(
"test_case",
[
# Analyze processed snippets from OpenInterpreter
CodeSnippetTest(
input_message="""
Here is our task for this conversation, you must remember it all time unless i tell you otherwise.
<task>
please analyze
<additional-context>
- Super critical information, the files attached here are part of the task and need to be
- The URLs attached here need to be scrapped and the information should be used for the
- The files passed in context are provided to help you understand the task better, the
<files count="1"><file path="testing_file.py">import invokehttp
import fastapi
from fastapi import FastAPI, Request, Response, HTTPException
import numpy

GITHUB_TOKEN="ghp_1J9Z3Z2dfg4dfs23dsfsdf232aadfasdfasfasdf32"

def add(a, b):
return a + b

def multiply(a, b):
return a * b



def substract(a, b):
</file></files>
<urls></urls>
</additional-context>

</task>
""",
expected_count=1,
expected=[
CodeSnippet(
language="python",
filepath="testing_file.py",
code="def multiply(a, b):",
file_extension=".py",
),
],
),
],
)
def test_extract_kodu_snippets(test_case: CodeSnippetTest):
extractor = KoduCodeSnippetExtractor()
snippets = extractor.extract_snippets(test_case.input_message, require_filepath=True)
_evaluate_actual_snippets(snippets, test_case)


@pytest.mark.parametrize(
"filepath,expected",
[
Expand Down