diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 8900e4f4..fa1d56b6 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -145,7 +145,7 @@ jobs: function check_model_ready() { response=$(curl -s http://localhost:11434/api/generate -d '{ "model": "qwen2.5-coder:0.5b", - "prompt": "Why is the sky blue?", + "prompt": "Hello", "stream": false }' 2>&1) @@ -246,7 +246,7 @@ jobs: echo -e "\nVerify the completions endpoint works\n" curl http://localhost:8000/v1/completions -H "Content-Type: application/json" -d '{ "model": "Qwen/Qwen2.5-Coder-0.5B-Instruct", - "prompt": ["How to make pizza"], + "prompt": ["Hello"], "max_tokens": 100, "temperature": 0 }' @@ -269,6 +269,7 @@ jobs: docker logs vllm - name: Tests - ${{ matrix.test-provider }} + timeout-minutes: 15 env: CODEGATE_PROVIDERS: ${{ matrix.test-provider }} run: | diff --git a/tests/integration/anthropic/testcases.yaml b/tests/integration/anthropic/testcases.yaml index 72417b91..5c5bd326 100644 --- a/tests/integration/anthropic/testcases.yaml +++ b/tests/integration/anthropic/testcases.yaml @@ -50,7 +50,7 @@ testcases: "content": [ { "type": "text", - "text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\nif (i % 2 === 0) {\n sum += i;\n }\n\n## EXAMPLE QUERY:\n\n\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n\n\n## CORRECT COMPLETION:\n\n total += x\n\n## EXAMPLE QUERY:\n\n\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n\n\n## CORRECT COMPLETION:\n\ntype Tree\n = {$:\"Node\", lft: Tree, rgt: Tree}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\nplanet from the Sun\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\na ** 2 + \n\n\n# Path: Untitled.txt\n# http://127.0.0.1:8989/vllm/completions\n# codegate/test.py\nimport requests\n\ndef call_api():\n {{FILL_HERE}}\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n" + "text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\nif (i % 2 === 0) {\n sum += i;\n }\n\n## EXAMPLE QUERY:\n\n\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n\n\n## CORRECT COMPLETION:\n\n total += x\n\n## EXAMPLE QUERY:\n\n\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n\n\n## CORRECT COMPLETION:\n\ntype Tree\n = {$:\"Node\", lft: Tree, rgt: Tree}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\nplanet from the Sun\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\na ** 2 + \n\n\n\ndef print_hello():\n {{FILL_HERE}}\n\n\nprint_hello()\n\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n" } ] } @@ -58,13 +58,7 @@ testcases: "system": "" } likes: | - def call_api(url, method='get', data=None): - if method.lower() == 'get': - return requests.get(url) - elif method.lower() == 'post': - return requests.post(url, json=data) - else: - raise ValueError("Unsupported HTTP method") + print("Hello, World!") anthropic_malicious_package_question: name: Anthropic Malicious Package diff --git a/tests/integration/ollama/testcases.yaml b/tests/integration/ollama/testcases.yaml index 07a4cf5c..4558aabf 100644 --- a/tests/integration/ollama/testcases.yaml +++ b/tests/integration/ollama/testcases.yaml @@ -37,7 +37,7 @@ testcases: "model": "qwen2.5-coder:0.5b", "raw": true, "options": { - "temperature": 0.01, + "temperature": 0, "num_predict": 4096, "stop": [ "<|endoftext|>", @@ -55,13 +55,10 @@ testcases: ], "num_ctx": 8096 }, - "prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>" + "prompt":"# ***Do not add code comments!***\n***Do not add anything else besides the body of the function!***\n<|fim_prefix|>def print_hello_world():\n <|fim_suffix|>\n\n\nprint_hello_world()\n<|fim_middle|>" } likes: | - ```python - if __name__ == '__main__': - invokehttp.run(call_api) - ``` + print("Hello, World!") ollama_malicious_package_question: name: Ollama Malicious Package diff --git a/tests/integration/vllm/testcases.yaml b/tests/integration/vllm/testcases.yaml index 32284764..48e2bf6e 100644 --- a/tests/integration/vllm/testcases.yaml +++ b/tests/integration/vllm/testcases.yaml @@ -51,17 +51,10 @@ testcases: "#- coding: utf-8", "```" ], - "prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>" + "prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>" } likes: | - return response.json() - - def test_call_api(): - response = call_api('http://localhost:8080', method='post', data='data') - assert response['key1'] == 'test1' and response['key2'] == 'test2', "Test failed" - - if __name__ == '__main__': - test_call_api() + print("Hello, World!") vllm_malicious_package_question: name: VLLM Malicious Package