Thaks for the response. Highly appreciate your co-operation. Actually I did modify the code as follows;
import os
os.environ["MODIN_ENGINE"] = "dask"
import modin.pandas as pd
import swifter
temp_df = pd.DataFrame({"A": [0, 1, 2, 3, 1], "B": [3, 4, 5, 6, 7]})
temp_df[["A", "B"]] = temp_df[["A", "B"]].swifter.apply(np.uint8)
temp_df
But now the error message has changed;
ValueError Traceback (most recent call last)
~/miniconda3/lib/python3.8/site-packages/swifter/parallel_accessor.py in apply(self, func, axis, raw, result_type, args, **kwds)
47 tmp_df = func(sample, *args, **kwds)
---> 48 sample_df = sample.apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds)
49 self._validate_apply(
~/miniconda3/lib/python3.8/site-packages/modin/pandas/dataframe.py in apply(self, func, axis, raw, result_type, args, **kwds)
378 if axis == 0 and result.name == self.index[0] or result.name == 0:
--> 379 result.name = None
380 elif axis == 1 and result.name == self.columns[0] or result.name == 0:
~/miniconda3/lib/python3.8/site-packages/modin/pandas/series.py in _set_name(self, name)
112 name = "__reduced__"
--> 113 self._query_compiler.columns = [name]
114
~/miniconda3/lib/python3.8/site-packages/modin/backends/pandas/query_compiler.py in set_axis(self, cols)
51 def set_axis(self, cols):
---> 52 self._modin_frame.columns = cols
53
~/miniconda3/lib/python3.8/site-packages/modin/engines/base/frame/data.py in _set_columns(self, new_columns)
207 else:
--> 208 new_columns = self._validate_set_axis(new_columns, self._columns_cache)
209 self._columns_cache = new_columns
~/miniconda3/lib/python3.8/site-packages/modin/engines/base/frame/data.py in _validate_set_axis(self, new_labels, old_labels)
163 if old_len != new_len:
--> 164 raise ValueError(
165 "Length mismatch: Expected axis has %d elements, "
ValueError: Length mismatch: Expected axis has 5 elements, new values have 1 elements
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-3-8087e8bf8fab> in <module>
2
3 temp_df = pd.DataFrame({"A": [0, 1, 2, 3, 1], "B": [3, 4, 5, 6, 7]})
----> 4 temp_df[["A", "B"]] = temp_df[["A", "B"]].swifter.apply(np.uint8)
5 temp_df
~/miniconda3/lib/python3.8/site-packages/swifter/parallel_accessor.py in apply(self, func, axis, raw, result_type, args, **kwds)
53 return func(self._obj, *args, **kwds)
54 except ERRORS_TO_HANDLE: # if can't vectorize, return regular apply
---> 55 return self._obj.apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds)
56
57
~/miniconda3/lib/python3.8/site-packages/modin/pandas/dataframe.py in apply(self, func, axis, raw, result_type, args, **kwds)
377 if isinstance(result, Series):
378 if axis == 0 and result.name == self.index[0] or result.name == 0:
--> 379 result.name = None
380 elif axis == 1 and result.name == self.columns[0] or result.name == 0:
381 result.name = None
~/miniconda3/lib/python3.8/site-packages/modin/pandas/series.py in _set_name(self, name)
111 if name is None:
112 name = "__reduced__"
--> 113 self._query_compiler.columns = [name]
114
115 name = property(_get_name, _set_name)
~/miniconda3/lib/python3.8/site-packages/modin/backends/pandas/query_compiler.py in set_axis(self, cols)
50
51 def set_axis(self, cols):
---> 52 self._modin_frame.columns = cols
53
54 return set_axis
~/miniconda3/lib/python3.8/site-packages/modin/engines/base/frame/data.py in _set_columns(self, new_columns)
206 self._columns_cache = ensure_index(new_columns)
207 else:
--> 208 new_columns = self._validate_set_axis(new_columns, self._columns_cache)
209 self._columns_cache = new_columns
210 if self._dtypes is not None:
~/miniconda3/lib/python3.8/site-packages/modin/engines/base/frame/data.py in _validate_set_axis(self, new_labels, old_labels)
162 new_len = len(new_labels)
163 if old_len != new_len:
--> 164 raise ValueError(
165 "Length mismatch: Expected axis has %d elements, "
166 "new values have %d elements" % (old_len, new_len)
ValueError: Length mismatch: Expected axis has 5 elements, new values have 1 elements
I am not being able to comprehend, how come this code is working fine with regular pandas
, when Modin
is just a wrapper for scalability, as mentioned on the docs page. Another thing, I am curious about is do we necessarily need to specify the computing engine as I did in first two lines of the code.
I will be really grateful, if you can help me to understand above issues. Thanks in advance.