datafold
diff --git a/‎data_diff/databases/__init__.py
Lines changed: 1 addition & 0 deletions b/‎data_diff/databases/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎data_diff/databases/_connect.py
Lines changed: 2 additions & 0 deletions b/‎data_diff/databases/_connect.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎data_diff/databases/mssql.py
Lines changed: 10 additions & 0 deletions b/‎data_diff/databases/mssql.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎data_diff/joindiff_tables.py
Lines changed: 10 additions & 3 deletions b/‎data_diff/joindiff_tables.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎data_diff/sqeleton/abcs/database_types.py
Lines changed: 9 additions & 1 deletion b/‎data_diff/sqeleton/abcs/database_types.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎data_diff/sqeleton/databases/__init__.py
Lines changed: 1 addition & 0 deletions b/‎data_diff/sqeleton/databases/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎data_diff/sqeleton/databases/_connect.py
Lines changed: 2 additions & 0 deletions b/‎data_diff/sqeleton/databases/_connect.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎data_diff/sqeleton/databases/base.py
Lines changed: 15 additions & 3 deletions b/‎data_diff/sqeleton/databases/base.py
Lines changed: 15 additions & 3 deletions
@@ -12,5 +12,6 @@
 from .clickhouse import Clickhouse
 from .vertica import Vertica
 from .duckdb import DuckDB
+from .mssql import MsSql
 
 from ._connect import connect
@@ -14,6 +14,7 @@
 from .clickhouse import Clickhouse
 from .vertica import Vertica
 from .duckdb import DuckDB
+from .mssql import MsSql
 
 
 DATABASE_BY_SCHEME = {
@@ -29,6 +30,7 @@
     "trino": Trino,
     "clickhouse": Clickhouse,
     "vertica": Vertica,
+    "mssql": MsSql
 }
 
 
 
@@ -0,0 +1,10 @@
+from data_diff.sqeleton.databases import mssql
+from .base import DatadiffDialect
+
+
+class Dialect(mssql.Dialect, mssql.Mixin_MD5, mssql.Mixin_NormalizeValue, DatadiffDialect):
+    pass
+
+
+class MsSql(mssql.MsSQL):
+    dialect = Dialect()
@@ -10,7 +10,7 @@
 
 from runtype import dataclass
 
-from data_diff.sqeleton.databases import Database, MySQL, BigQuery, Presto, Oracle, Snowflake, DbPath
+from data_diff.sqeleton.databases import Database, MsSQL, MySQL, BigQuery, Presto, Oracle, Snowflake, DbPath
 from data_diff.sqeleton.abcs import NumericType
 from data_diff.sqeleton.queries import (
     table,
@@ -25,9 +25,10 @@
     leftjoin,
     rightjoin,
     this,
+    when,
     Compiler,
 )
-from data_diff.sqeleton.queries.ast_classes import Concat, Count, Expr, Random, TablePath, Code, ITable
+from data_diff.sqeleton.queries.ast_classes import Concat, Count, Expr, Func, Random, TablePath, Code, ITable
 from data_diff.sqeleton.queries.extras import NormalizeAsString
 
 from .info_tree import InfoTree
@@ -82,6 +83,12 @@ def _outerjoin(db: Database, a: ITable, b: ITable, keys1: List[str], keys2: List
 
     is_exclusive_a = and_(b[k] == None for k in keys2)
     is_exclusive_b = and_(a[k] == None for k in keys1)
+
+    if isinstance(db, MsSQL):
+        # There is no "IS NULL" or "ISNULL()" as expressions, only as conditions.
+        is_exclusive_a = when(is_exclusive_a).then(1).else_(0)
+        is_exclusive_b = when(is_exclusive_b).then(1).else_(0)
+
     if isinstance(db, Oracle):
         is_exclusive_a = bool_to_int(is_exclusive_a)
         is_exclusive_b = bool_to_int(is_exclusive_b)
@@ -342,7 +349,7 @@ def _count_diff_per_column(self, db, diff_rows, cols, is_diff_cols):
         self.stats["diff_counts"] = diff_counts
 
     def _sample_and_count_exclusive(self, db, diff_rows, a_cols, b_cols):
-        if isinstance(db, Oracle):
+        if isinstance(db, (Oracle, MsSQL)):
             exclusive_rows_query = diff_rows.where((this.is_exclusive_a == 1) | (this.is_exclusive_b == 1))
         else:
             exclusive_rows_query = diff_rows.where(this.is_exclusive_a | this.is_exclusive_b)
 
@@ -216,7 +216,15 @@ def current_timestamp(self) -> str:
         "Provide SQL for returning the current timestamp, aka now"
 
     @abstractmethod
-    def offset_limit(self, offset: Optional[int] = None, limit: Optional[int] = None):
+    def current_database(self) -> str:
+        "Provide SQL for returning the current default database."
+
+    @abstractmethod
+    def current_schema(self) -> str:
+        "Provide SQL for returning the current default schema."
+
+    @abstractmethod
+    def offset_limit(self, offset: Optional[int] = None, limit: Optional[int] = None, has_order_by: Optional[bool] = None) -> str:
         "Provide SQL fragment for limit and offset inside a select"
 
     @abstractmethod
 
@@ -14,5 +14,6 @@
 from .clickhouse import Clickhouse
 from .vertica import Vertica
 from .duckdb import DuckDB
+from .mssql import MsSQL
 
 connect = Connect()
@@ -21,6 +21,7 @@
 from .clickhouse import Clickhouse
 from .vertica import Vertica
 from .duckdb import DuckDB
+from .mssql import MsSQL
 
 
 @dataclass
@@ -86,6 +87,7 @@ def match_path(self, dsn):
     "trino": Trino,
     "clickhouse": Clickhouse,
     "vertica": Vertica,
+    "mssql": MsSQL
 }
 
 
 
@@ -155,7 +155,7 @@ class BaseDialect(AbstractDialect):
 
     PLACEHOLDER_TABLE = None  # Used for Oracle
 
-    def offset_limit(self, offset: Optional[int] = None, limit: Optional[int] = None):
+    def offset_limit(self, offset: Optional[int] = None, limit: Optional[int] = None, has_order_by: Optional[bool] = None) -> str:
         if offset:
             raise NotImplementedError("No support for OFFSET in query")
 
@@ -182,6 +182,12 @@ def random(self) -> str:
     def current_timestamp(self) -> str:
         return "current_timestamp()"
 
+    def current_database(self) -> str:
+        return "current_database()"
+
+    def current_schema(self) -> str:
+        return "current_schema()"
+
     def explain_as_text(self, query: str) -> str:
         return f"EXPLAIN {query}"
 
@@ -518,7 +524,12 @@ def _query_cursor(self, c, sql_code: str) -> QueryResult:
             c.execute(sql_code)
             if sql_code.lower().startswith(("select", "explain", "show")):
                 columns = [col[0] for col in c.description]
-                return QueryResult(c.fetchall(), columns)
+
+                # TODO FIXME pyodbc.Row seems to be causing a pydantic error
+                # [ConstantTable] Attribute 'rows' expected value of type Sequence[Sequence[Any]]
+                fetched = c.fetchall()
+                result = QueryResult(fetched, columns)
+                return result
         except Exception as _e:
             # logger.exception(e)
             # logger.error(f'Caused by SQL: {sql_code}')
@@ -590,7 +601,8 @@ def is_autocommit(self) -> bool:
         return False
 
 
-CHECKSUM_HEXDIGITS = 15  # Must be 15 or lower, otherwise SUM() overflows
+# TODO FYI mssql md5_as_int currently requires this to be reduced
+CHECKSUM_HEXDIGITS = 14  # Must be 15 or lower, otherwise SUM() overflows
 MD5_HEXDIGITS = 32
 
 _CHECKSUM_BITSIZE = CHECKSUM_HEXDIGITS << 2