perspective-dev · texodus · May 26, 2023 · May 17, 2023 · May 19, 2023 · May 24, 2023
diff --git a/cpp/perspective/src/cpp/sparse_tree.cpp b/cpp/perspective/src/cpp/sparse_tree.cpp
@@ -1127,19 +1127,13 @@ t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info,
                 new_value.set(reduce_from_gstate<
                     std::function<t_tscalar(std::vector<t_tscalar>&)>>(gstate,
                     expression_master_table, spec.get_dependencies()[0].name(),
-                    pkeys, [](std::vector<t_tscalar>& values) {
+                    pkeys, [&](std::vector<t_tscalar>& values) {
                         if (values.size() == 0) {
                             return t_tscalar();
                         } else if (values.size() == 1) {
                             return values[0];
                         } else {
-                            std::vector<t_tscalar>::iterator middle
-                                = values.begin() + (values.size() / 2);
-
-                            std::nth_element(
-                                values.begin(), middle, values.end());
-
-                            return *middle;
+                            return get_aggregate_median(values);
                         }
                     }));
 
@@ -2020,6 +2014,28 @@ t_stree::get_aggregate(t_index idx, t_index aggnum) const {
     return extract_aggregate(m_aggspecs[aggnum], c, agg_ridx, agg_pridx);
 }
 
+t_tscalar
+t_stree::get_aggregate_median(std::vector<t_tscalar>& values) const {
+    int size = values.size();
+    bool is_even_size = size % 2 == 0;
+
+    if (is_even_size && values[0].is_numeric()){
+        t_tscalar median_average;
+        std::vector<t_tscalar>::iterator first_middle = values.begin() + ((size - 1) / 2);
+        std::vector<t_tscalar>::iterator second_middle = values.begin() + (size / 2);
+
+        nth_element(values.begin(),  first_middle, values.end());
+        nth_element(values.begin(), second_middle, values.end());
+
+        median_average.set((*first_middle + *second_middle) / static_cast<t_tscalar>(2));
+        return median_average;
+    }else{
+        std::vector<t_tscalar>::iterator middle = values.begin() + (size / 2);
+        std::nth_element(values.begin(), middle, values.end());
+        return *middle;
+    }
+}
+
 void
 t_stree::get_child_indices(t_index idx, std::vector<t_index>& out_data) const {
     t_index num_children = get_num_children(idx);

diff --git a/cpp/perspective/src/include/perspective/sparse_tree.h b/cpp/perspective/src/include/perspective/sparse_tree.h
@@ -258,6 +258,8 @@ class PERSPECTIVE_EXPORT t_stree {
 
     t_tscalar get_aggregate(t_index idx, t_index aggnum) const;
 
+    t_tscalar get_aggregate_median(std::vector<t_tscalar>& values) const;
+
     void get_child_indices(t_index idx, std::vector<t_index>& out_data) const;
 
     void set_alerts_enabled(bool enabled_state);

diff --git a/packages/perspective/test/js/expressions/functionality.spec.js b/packages/perspective/test/js/expressions/functionality.spec.js
@@ -2491,11 +2491,21 @@ const perspective = require("@finos/perspective");
         });
 
         test("Should be able to aggregate a numeric expression column.", async function () {
-            const table = await perspective.table({
+
+            const data = {
                 x: [1, 2, 3, 4],
                 y: [100, 200, 300, 400],
                 z: [1.5, 2.5, 3.5, 4.5],
-            });
+            }
+            const schema = {
+                x:'float',
+                y:'float',
+                z:'float'
+            }
+
+            const table = await perspective.table(schema);
+            table.update(data)
+
             const view = await table.view({
                 group_by: ['"x" + "z"'],
                 aggregates: {
@@ -2507,8 +2517,8 @@ const perspective = require("@finos/perspective");
             const result = await view.to_columns();
             expect(result).toEqual({
                 __ROW_PATH__: [[], [2.5], [4.5], [6.5], [8.5]],
-                '"x" + "z"': [6.5, 2.5, 4.5, 6.5, 8.5],
-                x: [3, 1, 2, 3, 4],
+                '"x" + "z"': [5.5, 2.5, 4.5, 6.5, 8.5],
+                x: [2.5, 1, 2, 3, 4],
                 y: [1000, 100, 200, 300, 400],
                 z: [12, 1.5, 2.5, 3.5, 4.5],
             });
@@ -2540,11 +2550,20 @@ const perspective = require("@finos/perspective");
         });
 
         test("Should be able to aggregate a numeric expression column that aliases a real column.", async function () {
-            const table = await perspective.table({
+            const data = {
                 x: [1, 2, 3, 4],
                 y: [100, 200, 300, 400],
                 z: [1.5, 2.5, 3.5, 4.5],
-            });
+            }
+            const schema = {
+                x:'float',
+                y:'float',
+                z:'float'
+            }
+
+            const table = await perspective.table(schema);
+            table.update(data)
+
             const view = await table.view({
                 group_by: ['"x"'],
                 aggregates: {
@@ -2556,8 +2575,8 @@ const perspective = require("@finos/perspective");
             const result = await view.to_columns();
             expect(result).toEqual({
                 __ROW_PATH__: [[], [1], [2], [3], [4]],
-                '"x"': [3, 1, 2, 3, 4],
-                x: [3, 1, 2, 3, 4],
+                '"x"': [2.5, 1, 2, 3, 4],
+                x: [2.5, 1, 2, 3, 4],
                 y: [1000, 100, 200, 300, 400],
                 z: [12, 1.5, 2.5, 3.5, 4.5],
             });

diff --git a/python/perspective/perspective/tests/core/test_aggregates.py b/python/perspective/perspective/tests/core/test_aggregates.py
@@ -8,7 +8,7 @@
 
 from pytest import raises
 from perspective import PerspectiveError, PerspectiveViewer,\
-                        PerspectiveWidget, Aggregate
+                        PerspectiveWidget, Aggregate, Table
 
 
 class TestAggregates:
@@ -86,3 +86,47 @@ def test_aggregates_viewer_set_all(self):
         for agg in Aggregate:
             viewer.aggregates = {"a": agg}
             assert viewer.aggregates == {"a": agg.value}
+
+    def get_median(self, input_data):
+         table = Table(data=input_data)
+         view = table.view(
+            columns=['Price'],
+            aggregates={'Price':'median'},
+            group_by=['Item'])
+
+         return view.to_json()[0]['Price']
+
+    def test_aggregate_median(self):
+        numeric_data = [
+            {'Item':'Book','Price':2.0},
+            {'Item':'Book','Price':3.0},
+            {'Item':'Book','Price':5.0},
+            {'Item':'Book','Price':4.0},
+            {'Item':'Book','Price':8.0},
+            {'Item':'Book','Price':9.0},
+            {'Item':'Book','Price':6.0},
+        ]
+
+        non_numeric_data = [
+            {'Item':'Book','Price':'2'},
+            {'Item':'Book','Price':'3'},
+            {'Item':'Book','Price':'5'},
+            {'Item':'Book','Price':'4'},
+            {'Item':'Book','Price':'8'},
+            {'Item':'Book','Price':'9'},
+            {'Item':'Book','Price':'6'},
+        ]
+
+        # Testing with numeric data
+        assert self.get_median(numeric_data) == 5.0  #List = [2.0,3.0,5.0,4.0,8.0,9.0,6.0], median = 5.0
+        assert self.get_median(numeric_data[:2]) == 2.5 #List = [2.0,3.0], median = 2.5
+        assert self.get_median(numeric_data[5:]) == 7.5 #List = [9.0,6.0], median = 7.5
+        assert self.get_median(numeric_data[1:]) == 5.5 #List = [3.0,5.0,4.0,8.0,9.0,6.0], median = 5.5
+        assert self.get_median(numeric_data[::2]) == 5.5 #List = [2.0,5.0,8.0,6.0], median = 5.5
+
+        # Testing with non-numeric data
+        assert self.get_median(non_numeric_data) == '5'  #List = ['2','3','5','4','8','9','6'], median = '5'
+        assert self.get_median(non_numeric_data[:2]) == '3' #List = ['2','3'], median = '5'
+        assert self.get_median(non_numeric_data[5:]) == '9' #List = ['9','6'], median = '9'
+        assert self.get_median(non_numeric_data[1:]) == '6' #List = ['3','5','4','8','9','6'], median = '6'
+        assert self.get_median(non_numeric_data[::2]) == '6' #List = ['2','5','8','6'], median = '6'