Add tests

nik · nikitabelonogov · commit 3dc6792ed0a7 · 2024-10-02T13:56:34.000+04:00
diff --git a/src/label_studio_sdk/_extensions/label_studio_tools/core/utils/json_schema.py b/src/label_studio_sdk/_extensions/label_studio_tools/core/utils/json_schema.py
@@ -1,6 +1,8 @@
 import json
 import types
 import sys
+import functools
+from typing import Type, Dict, Any, Tuple, Generator
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from datamodel_code_generator import InputFileType, generate, DataModelType, LiteralType
@@ -9,8 +11,25 @@
 from contextlib import contextmanager
 
 
+@functools.lru_cache(maxsize=128)
+def _generate_model_code(json_schema_str: str, class_name: str = 'MyModel') -> str:
+    with TemporaryDirectory() as temp_dir:
+        temp_file = Path(temp_dir) / "schema.py"
+        
+        generate(
+            json_schema_str,
+            input_file_type=InputFileType.JsonSchema,
+            input_filename="schema.json",
+            output=temp_file,
+            output_model_type=DataModelType.PydanticV2BaseModel,
+            enum_field_as_literal=LiteralType.All,
+            class_name=class_name
+        )
+        
+        return temp_file.read_text()
+
 @contextmanager
-def json_schema_to_pydantic(json_schema: dict, class_name: str = 'MyModel') -> type[BaseModel]:
+def json_schema_to_pydantic(json_schema: dict, class_name: str = 'MyModel') -> Generator[Type[BaseModel], None, None]:
     """
     Convert a JSON schema to a Pydantic model and provide it as a context manager.
 
@@ -36,31 +55,26 @@ def json_schema_to_pydantic(json_schema: dict, class_name: str = 'MyModel') -> t
             print(instance.model_dump())
         ```
     """
+    # Convert the JSON schema dictionary to a JSON string
     json_schema_str = json.dumps(json_schema)
-
-    with TemporaryDirectory() as temp_dir:
-        temp_file = Path(temp_dir) / "schema.py"
-        
-        generate(
-            json_schema_str,
-            input_file_type=InputFileType.JsonSchema,
-            input_filename="schema.json",
-            output=temp_file,
-            output_model_type=DataModelType.PydanticV2BaseModel,
-            enum_field_as_literal=LiteralType.All,
-            class_name=class_name
-        )
-        
-        model_code = temp_file.read_text()
-
-    mod = types.ModuleType('dynamic_module')
+    
+    # Generate Pydantic model code from the JSON schema string
+    model_code: str = _generate_model_code(json_schema_str, class_name)
+    
+    # Create a unique module name using the id of the JSON schema string
+    module_name = f'dynamic_module_{id(json_schema_str)}'
+    
+    # Create a new module object with the unique name and execute the generated model code in the context of the new module
+    mod = types.ModuleType(module_name)
     exec(model_code, mod.__dict__)
-
     model_class = getattr(mod, class_name)
     
     try:
-        sys.modules['dynamic_module'] = mod
+        # Add the new module to sys.modules to make it importable
+        # This is necessary to avoid Pydantic errors related to undefined models
+        sys.modules[module_name] = mod
         yield model_class
     finally:
-        if 'dynamic_module' in sys.modules:
-            del sys.modules['dynamic_module']
+        if module_name in sys.modules:
+            del sys.modules[module_name]
+        
diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py
@@ -321,7 +321,7 @@ def create_regions(self, data: Dict[str, Union[str, Dict, List[str], List[Dict]]
             # 2. we should be less open regarding the payload type and defining the strict typing elsewhere,
             # but likely that requires rewriting of how ControlTag.label() is working now
             if isinstance(payload, str):
-                payload = {'label': payload}
+                payload = {'label': payload, 'text': [payload]}
             elif isinstance(payload, list):
                 if len(payload) > 0:
                     if isinstance(payload[0], str):
diff --git a/tests/custom/test_interface/test_json_schema.py b/tests/custom/test_interface/test_json_schema.py
@@ -1,4 +1,5 @@
 import pytest
+import json
 from datetime import datetime, timezone
 from label_studio_sdk.label_interface.interface import LabelInterface
 from label_studio_sdk.label_interface.control_tags import ControlTag
@@ -200,7 +201,119 @@ def test_to_json_schema(config, expected_json_schema, input_arg, expected_result
     json_schema = interface.to_json_schema()
     assert json_schema == expected_json_schema
 
-    # convert JSON Schema to Pydantic
     with json_schema_to_pydantic(json_schema) as ResponseModel:
         instance = ResponseModel(**input_arg)
-        assert instance.model_dump() == expected_result
+        assert instance.model_dump() == expected_result
+
+
+
+def process_json_schema(json_schema, input_arg, queue):
+    with json_schema_to_pydantic(json_schema) as ResponseModel:
+        instance = ResponseModel(**input_arg)
+        queue.put(instance.model_dump())
+
+def test_concurrent_json_schema_to_pydantic():
+    import multiprocessing
+    json_schema = {
+        "type": "object",
+        "properties": {
+            "sentiment": {
+                "type": "string",
+                "description": "Choices for doc",
+                "enum": ["Positive", "Negative", "Neutral"],
+            }
+        },
+        "required": ["sentiment"]
+    }
+    input_arg1 = {"sentiment": "Positive"}
+    input_arg2 = {"sentiment": "Negative"}
+    
+    queue = multiprocessing.Queue()
+    
+    p1 = multiprocessing.Process(target=process_json_schema, args=(json_schema, input_arg1, queue))
+    p2 = multiprocessing.Process(target=process_json_schema, args=(json_schema, input_arg2, queue))
+    
+    p1.start()
+    p2.start()
+    
+    p1.join()
+    p2.join()
+    
+    results = [queue.get() for _ in range(2)]
+    
+    assert {"sentiment": "Positive"} in results
+    assert {"sentiment": "Negative"} in results
+    assert len(results) == 2
+
+
+def process_json_schema_threaded(json_schema, input_arg, results, index):
+    with json_schema_to_pydantic(json_schema) as ResponseModel:
+        instance = ResponseModel(**input_arg)
+        results[index] = instance.model_dump()
+
+def test_concurrent_json_schema_to_pydantic_threaded():
+    import threading
+    import time
+    
+    json_schema = {
+        "type": "object",
+        "properties": {
+            "sentiment": {
+                "type": "string",
+                "description": "Choices for doc",
+                "enum": ["Positive", "Negative", "Neutral"],
+            }
+        },
+        "required": ["sentiment"]
+    }
+    input_args = [
+        {"sentiment": "Positive"},
+        {"sentiment": "Negative"},
+        {"sentiment": "Neutral"},
+        {"sentiment": "Positive"}
+    ]
+    
+    results = [None] * len(input_args)
+    threads = []
+
+    # Create and start threads
+    for i, input_arg in enumerate(input_args):
+        thread = threading.Thread(target=process_json_schema_threaded, args=(json_schema, input_arg, results, i))
+        threads.append(thread)
+        thread.start()
+
+    # Wait for all threads to complete
+    for thread in threads:
+        thread.join()
+
+    # Verify results
+    assert {"sentiment": "Positive"} in results
+    assert {"sentiment": "Negative"} in results
+    assert {"sentiment": "Neutral"} in results
+    assert results.count({"sentiment": "Positive"}) == 2
+    assert len(results) == 4
+    assert None not in results
+
+    # Verify thread safety by running multiple times
+    for _ in range(10):
+        results = [None] * len(input_args)
+        threads = []
+
+        start_time = time.time()
+        for i, input_arg in enumerate(input_args):
+            thread = threading.Thread(target=process_json_schema_threaded, args=(json_schema, input_arg, results, i))
+            threads.append(thread)
+            thread.start()
+
+        for thread in threads:
+            thread.join()
+
+        end_time = time.time()
+
+        assert set(result["sentiment"] for result in results) == set(["Positive", "Negative", "Neutral"])
+        assert results.count({"sentiment": "Positive"}) == 2
+        assert len(results) == 4
+        assert None not in results
+
+        # Check if execution time is reasonable (adjust as needed)
+        assert end_time - start_time < 1.0, f"Execution took too long: {end_time - start_time} seconds"