Skip to content

Commit 18cd5c2

Browse files
committed
Make use of binary buffers to speed up data transfer
Signed-off-by: martinRenou <[email protected]>
1 parent dc50201 commit 18cd5c2

22 files changed

+679
-798
lines changed

ipydatagrid/datagrid.py

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import numpy as np
1111
import pandas as pd
12+
from bqplot.traits import array_from_json, array_to_json
1213
from ipywidgets import CallbackDispatcher, DOMWidget, widget_serialization
1314
from traitlets import (
1415
Bool,
@@ -76,7 +77,6 @@ def _cell_in_rect(cell, rect):
7677

7778

7879
class SelectionHelper:
79-
8080
"""A Helper Class for processing selections. Provides an iterator
8181
to traverse selected cells.
8282
"""
@@ -164,14 +164,13 @@ def _get_num_rows(self):
164164
return self._num_rows
165165

166166

167-
# modified from ipywidgets original
168-
def _data_to_json(x):
167+
def _data_to_json(x, _):
169168
if isinstance(x, dict):
170-
return {str(k): _data_to_json(v) for k, v in x.items()}
169+
return {str(k): _data_to_json(v, _) for k, v in x.items()}
171170
if isinstance(x, np.ndarray):
172-
return _data_to_json(x.tolist())
171+
return _data_to_json(x.tolist(), _)
173172
if isinstance(x, (list, tuple)):
174-
return [_data_to_json(v) for v in x]
173+
return [_data_to_json(v, _) for v in x]
175174
if isinstance(x, int):
176175
return x
177176
if isinstance(x, float):
@@ -193,9 +192,55 @@ def _data_to_json(x):
193192
return str(x)
194193

195194

195+
def _data_serialization_impl(data, _):
196+
if not data:
197+
return {}
198+
199+
serialized_data = {}
200+
for column, value in data["data"].items():
201+
arr = value.to_numpy()
202+
if arr.size == 0:
203+
serialized_data[str(column)] = {
204+
"value": [],
205+
"dtype": str(arr.dtype),
206+
"shape": arr.shape,
207+
"type": None,
208+
}
209+
continue
210+
try:
211+
serialized_data[str(column)] = array_to_json(arr)
212+
except ValueError:
213+
# Column is most likely heterogeneous, sending the column raw
214+
serialized_data[str(column)] = {
215+
"value": _data_to_json(arr, _),
216+
"type": "raw",
217+
}
218+
219+
return {
220+
"data": serialized_data,
221+
"schema": data["schema"],
222+
"fields": _data_to_json(data["fields"], _),
223+
}
224+
225+
226+
def _data_deserialization_impl(data, _): # noqa: U101
227+
if not data:
228+
return {}
229+
230+
deserialized_data = {}
231+
for column, value in data["data"].items():
232+
deserialized_data[column] = array_from_json(value.to_numpy())
233+
234+
return {
235+
"data": deserialized_data,
236+
"schema": data["schema"],
237+
"fields": data["fields"],
238+
}
239+
240+
196241
_data_serialization = {
197-
"from_json": widget_serialization["from_json"],
198-
"to_json": lambda x, _: _data_to_json(x), # noqa: U101
242+
"from_json": _data_deserialization_impl,
243+
"to_json": _data_serialization_impl,
199244
}
200245

201246

@@ -212,7 +257,6 @@ def _widgets_dict_to_json(x, obj):
212257

213258

214259
class DataGrid(DOMWidget):
215-
216260
"""A Grid Widget with filter, sort and selection capabilities.
217261
218262
Attributes
@@ -360,7 +404,7 @@ class DataGrid(DOMWidget):
360404
).tag(sync=True)
361405
selections = List(Dict()).tag(sync=True)
362406
editable = Bool(False).tag(sync=True)
363-
column_widths = Dict({}).tag(sync=True, **_data_serialization)
407+
column_widths = Dict({}).tag(sync=True, to_json=_data_to_json)
364408
grid_style = Dict(allow_none=True).tag(
365409
sync=True, **_widgets_dict_serialization
366410
)
@@ -383,17 +427,15 @@ def __init__(self, dataframe, index_name=None, **kwargs):
383427
def __handle_custom_msg(self, _, content, buffers): # noqa: U101,U100
384428
if content["event_type"] == "cell-changed":
385429
row = content["row"]
386-
column = self._column_index_to_name(
387-
self._data, content["column_index"]
388-
)
430+
column = content["column"]
389431
value = content["value"]
390432
# update data on kernel
391-
self._data["data"][row][column] = value
433+
self._data["data"].loc[row, column] = value
392434
# notify python listeners
393435
self._cell_change_handlers(
394436
{
395437
"row": row,
396-
"column": column,
438+
"column": content["column"],
397439
"column_index": content["column_index"],
398440
"value": value,
399441
}
@@ -414,7 +456,7 @@ def __handle_custom_msg(self, _, content, buffers): # noqa: U101,U100
414456
@property
415457
def data(self):
416458
trimmed_primary_key = self._data["schema"]["primaryKey"][:-1]
417-
if self._data["data"]:
459+
if "data" in self._data:
418460
df = pd.DataFrame(self._data["data"])
419461
else:
420462
df = pd.DataFrame(
@@ -460,7 +502,7 @@ def generate_data_object(dataframe, guid_key="ipydguuid", index_name="key"):
460502

461503
schema = pd.io.json.build_table_schema(dataframe)
462504
reset_index_dataframe = dataframe.reset_index()
463-
data = reset_index_dataframe.to_dict(orient="records")
505+
data = reset_index_dataframe
464506

465507
# Check for multiple primary keys
466508
key = reset_index_dataframe.columns[: dataframe.index.nlevels].tolist()
@@ -522,7 +564,7 @@ def get_cell_value(self, column_name, primary_key_value):
522564
if isinstance(column_name, list):
523565
column_name = tuple(column_name)
524566

525-
return [self._data["data"][row][column_name] for row in row_indices]
567+
return [self._data["data"][column_name][row] for row in row_indices]
526568

527569
def set_cell_value(self, column_name, primary_key_value, new_value):
528570
"""Sets the value for a single cell by column name and primary key.
@@ -541,9 +583,9 @@ def set_cell_value(self, column_name, primary_key_value, new_value):
541583
# Iterate over all indices
542584
outcome = True
543585
for row_index in row_indices:
544-
has_column = column_name in self._data["data"][row_index]
586+
has_column = column_name in self._data["data"]
545587
if has_column and row_index is not None:
546-
self._data["data"][row_index][column_name] = new_value
588+
self._data["data"].loc[row_index, column_name] = new_value
547589
self._notify_cell_change(row_index, column_name, new_value)
548590
else:
549591
outcome = False
@@ -565,7 +607,9 @@ def set_row_value(self, primary_key_value, new_value):
565607
column_index = 0
566608
column = DataGrid._column_index_to_name(self._data, column_index)
567609
while column is not None:
568-
self._data["data"][row_index][column] = new_value[column_index]
610+
self._data["data"].loc[row_index, column] = new_value[
611+
column_index
612+
]
569613

570614
column_index = column_index + 1
571615
column = DataGrid._column_index_to_name(
@@ -577,17 +621,17 @@ def set_row_value(self, primary_key_value, new_value):
577621

578622
def get_cell_value_by_index(self, column_name, row_index):
579623
"""Gets the value for a single cell by column name and row index."""
580-
return self._data["data"][row_index][column_name]
624+
return self._data["data"][column_name][row_index]
581625

582626
def set_cell_value_by_index(self, column_name, row_index, new_value):
583627
"""Sets the value for a single cell by column name and row index.
584628
585629
Note: This method returns a boolean to indicate if the operation
586630
was successful.
587631
"""
588-
has_column = column_name in self._data["data"][row_index]
589-
if has_column and 0 <= row_index < len(self._data["data"]):
590-
self._data["data"][row_index][column_name] = new_value
632+
has_column = column_name in self._data["data"]
633+
if has_column and 0 <= row_index < len(self._data["data"][column_name]):
634+
self._data["data"].loc[row_index, column_name] = new_value
591635
self._notify_cell_change(row_index, column_name, new_value)
592636
return True
593637
return False
@@ -634,7 +678,7 @@ def get_visible_data(self):
634678
"""Returns a dataframe of the current View."""
635679
data = deepcopy(self._data)
636680
if self._visible_rows:
637-
data["data"] = [data["data"][i] for i in self._visible_rows]
681+
data["data"] = data["data"].reindex(self._visible_rows)
638682

639683
at = self._data["schema"]["primaryKey"]
640684
return_df = pd.DataFrame(data["data"]).set_index(at)
@@ -852,9 +896,10 @@ def _get_row_index_of_primary_key(self, value):
852896
"as the primary key."
853897
)
854898

899+
# TODO Is there a better way for this?
855900
row_indices = [
856-
at
857-
for at, row in enumerate(self._data["data"])
901+
idx
902+
for idx, row in self._data["data"].iterrows()
858903
if all(row[key[j]] == value[j] for j in range(len(key)))
859904
]
860905
return row_indices
@@ -865,7 +910,7 @@ def _get_cell_value_by_numerical_index(data, column_index, row_index):
865910
column = DataGrid._column_index_to_name(data, column_index)
866911
if column is None:
867912
return None
868-
return data["data"][row_index][column]
913+
return data["data"].loc[row_index, column]
869914

870915
def _set_renderer_defaults(self):
871916
# Set sensible default values for renderers that are not completely

jest.config.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ module.exports = {
33
'^.+\\.tsx?$': 'ts-jest',
44
'^.+\\.js$': 'babel-jest',
55
},
6-
transformIgnorePatterns: ['node_modules/?!(@jupyter-widgets)'],
6+
transformIgnorePatterns: ['node_modules/?!(@jupyter-widgets)', 'node_modules/bqplot'],
77
testPathIgnorePatterns: ['ui-tests-ipw7/', 'ui-tests-ipw8/'],
88
setupFiles: ['./tests/js/setupFile.js'],
99
testEnvironment: 'jsdom',
1010
moduleNameMapper: {
1111
'\\.(css|less)$': '<rootDir>/__mocks__/styleMock.js',
12+
"raw-loader!.*": "jest-raw-loader",
1213
},
1314
};

0 commit comments

Comments
 (0)