Skip to content

Inserts new Series objects failed with "Internal Error" #2322

Closed
@anmyachev

Description

@anmyachev

System information

  • OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Windows 10
  • Modin version (modin.__version__): 0.8.1.1+36.ga7d3093
  • Python version: Python 3.7.8
  • Code we can use to reproduce:
import modin.pandas as mpd
# import pandas as mpd
import numpy as np
import datetime
from pytz import timezone
import os

print(mpd.__version__)

mpd.DEFAULT_NPARTITIONS = 4


def line_count(fname):
    line_count = 0
    with open(fname) as ff:
        for line in ff:
            line_count += 1
    return line_count


def timestamp_to_local(ts, tzone="Australia/Melbourne"):
    """
    Convert unixtimestamp to local time. Output format:year-month-day hour:minute:seconds

    Args:
        ts: unix timestamp
        tzone: Time zone

    Return:
        local: local timestamp
    """

    local = (
        timezone("UTC")
        .localize(datetime.datetime.utcfromtimestamp(ts))
        .astimezone(timezone(tzone))
    )
    local = local.strftime(format="%Y-%m-%d %H:%M:%S:%f")

    return local


fpath = "data"
# ID='PJ'
session = "A018B6_200330-043359"
E4_path = os.path.join(fpath, session)
tzone = "Australia/Melbourne"

prev_end = 0
fs0 = 32

tempfp = os.path.join(E4_path, "TEMP.csv")


prev_endm = 0
accfp = "data/A018B6_200330-043359/ACC.csv"
accm = mpd.read_csv(accfp, header=None)
startm = accm.iloc[0][0]
tm = np.linspace(startm, startm + (len(accm) - 1) / fs0, len(accm))
accm = accm.iloc[2:, :]
accm.reset_index(drop=True, inplace=True)
accm.columns = ["X", "Y", "Z"]
tempm = mpd.read_csv(tempfp, header=None, skiprows=2)
tempm = tempm.loc[tempm.index.repeat(8)].reset_index(drop=True)[0].values
accm["X"] = accm["X"] / 64
accm["Y"] = accm["Y"] / 64
accm["Z"] = accm["Z"] / 64
accm.reset_index(drop=True, inplace=True)
accm["Time"] = mpd.Series(tm)

accm = accm.iloc[
    accm[(accm.X.notnull()) & (accm.Y.notnull()) & (accm.Z.notnull())].index.values, :
]
accm["Time"] = accm["Time"].apply(timestamp_to_local, tzone=tzone)
accm.reset_index(drop=True, inplace=True)
accm["T"] = mpd.Series(tempm)
print(accm)

Describe the problem

Initial found in https://discuss.modin.org/t/modin-errors-out-on-pytz-timezone/119.

Source code / logs

Traceback (most recent call last):
  File "reproducer_timezone.py", line 77, in <module>
    print(accm)
  File "...\modin\pandas\base.py", line 2684, in __str__
    return repr(self)
  File "...\modin\pandas\dataframe.py", line 182, in __repr__
    result = repr(self._build_repr_df(num_rows, num_cols))
  File "...\modin\pandas\base.py", line 168, in _build_repr_df
    return self.iloc[indexer]._query_compiler.to_pandas()
  File "...\modin\backends\pandas\query_compiler.py", line 231, in to_pandas
    return self._modin_frame.to_pandas()
  File "...\modin\engines\base\frame\data.py", line 1956, in to_pandas
    f"Internal and external indices on axis {axis} do not match.",
  File "...\modin\error_message.py", line 63, in catch_bugs_and_request_email
    " caused this error.\n{}".format(extra_log)
Exception: Internal Error. Please email [email protected] with the traceback and command that caused this error.
Internal and external indices on axis 0 do not match.
TEMP.csv
24.67
24.69
24.69
24.69
24.69
24.65
24.65
24.65
24.65
24.65
24.65
24.65
24.65
24.69
24.69
24.69
24.69
24.65
24.65
24.65
24.65
24.63
24.63
24.63
24.63
ACC.csv
-22,59,14
-26,43,20
-23,52,23
-24,56,25
-25,52,30
-26,55,34
-26,58,28
-29,58,23
-30,37,23
-28,51,26
-28,57,25
-29,47,25
-32,53,36
-29,50,32
-31,43,27
-39,49,32
-41,50,28
-41,39,29
-42,34,33
-45,44,36
-40,39,26
-40,28,27
-42,37,34
-41,37,32
-35,38,22
-40,41,26
-41,49,30
-39,51,30
-36,47,24
-36,45,22
-37,46,25
-38,51,27
-36,44,20
-37,46,27
-35,47,25
-36,45,20
-38,49,25
-37,48,21
-32,48,12
-37,50,30
-36,45,22
-36,49,24
-38,49,17
-37,49,25
-36,45,23
-35,42,20
-37,47,34
-36,49,26
-36,45,22
-37,51,22
-39,55,20
-41,58,19
-38,56,20
-38,54,26
-37,53,24
-35,53,28
-30,51,30
-26,43,27
-21,48,19
-23,55,21
-23,58,23
-19,56,19
-18,58,23
-17,54,19
-16,59,19
-14,61,19
-8,61,17
-5,62,16
-2,55,23
4,56,28
17,72,40
21,45,44
19,24,25
10,40,22
9,50,27
6,63,41
10,49,37
13,41,64
22,39,50
-2,54,50
-9,45,30
7,56,32
9,59,39
18,58,24
26,51,5
30,51,1
47,51,-12
51,52,-18
43,41,-25
35,30,-35
51,31,-35
59,28,-43
49,20,-39
53,27,-29
44,21,-24
44,27,-11
42,30,-1
56,43,1
50,37,-8
49,34,-9
48,42,-2
48,56,16
39,55,19
30,56,20
30,58,25
17,61,20
27,63,25
37,29,14
27,42,6
35,61,24
31,63,31
26,63,21
16,46,21
8,39,20
-6,56,23
-7,52,28
-25,60,41
-28,62,14
-27,45,-27
-19,43,30
-25,89,-1
74,118,-104
-17,88,-39
-4,31,-26
-15,98,-23
-6,107,55
2,83,-7
-5,64,-4
-6,66,6
-1,59,-6
-3,44,-19
-21,59,-21
-25,61,-28
-32,58,-27
-32,58,-24
-27,54,27
-36,86,-25
-30,58,-16
-29,65,-15
-27,66,-16
-20,54,-14
-19,53,-10
-22,67,-12
-19,62,-12
-21,60,-13
-20,59,-14
-26,57,-17
-17,41,-15
-24,53,-22
-22,67,16
-27,62,-19
-27,50,-25
-31,60,-24
-30,61,-15
-27,60,-18
-21,50,-11
-21,54,-9
-25,59,-9
-26,60,-10
-32,79,-11
-28,73,-10
-23,57,-10
-19,44,-8
-20,45,-10
-21,47,-12
-27,72,-18
-26,63,-16
-25,56,-19
-26,46,16
-27,46,-19
-23,44,-14
-26,55,-15
-34,88,-13
-32,88,-14
-22,58,-8
-21,58,-7
-28,65,-4
-26,57,-11
-28,55,-16
-30,57,-16
-28,53,-16
-26,54,-11
-29,64,-14
-25,57,-13
-22,51,-13
-31,71,-16
-30,74,-17
-23,66,-13
-15,47,-15
-15,55,-15
-11,47,-17
-7,35,-20
-13,49,-19
-20,61,-17
-26,79,-18
-25,64,-16
-28,66,-17
-28,66,-17
-18,72,-13
-14,63,-14
-9,54,-12
-12,62,-15
2,75,-62
-7,59,3
-10,60,-12
-18,69,-13
-16,62,-11
-18,51,-18
-21,54,-20
-29,73,-20
-28,72,-20
-21,61,-16
-20,58,-14
-16,53,-10
-15,55,-9
-20,69,-11
-22,73,-10
-18,65,-7
-2,62,-18
-14,63,0
-14,51,-12
-11,41,-15

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions