pandas-dev · jbrockmendel · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 4, 2025
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -199,8 +199,8 @@ class SortIntegerArray:
     params = [10**3, 10**5]
 
     def setup(self, N):
-        data = np.arange(N, dtype=float)
-        data[40] = np.nan
+        data = np.arange(N, dtype=float).astype(object)
+        data[40] = pd.NA
         self.array = pd.array(data, dtype="Int64")
 
     def time_argsort(self, N):

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     DataFrame,
     Index,
     MultiIndex,
@@ -445,6 +446,8 @@ def setup(self, inplace, dtype):
             values[::2] = np.nan
             if dtype == "Int64":
                 values = values.round()
+                values = values.astype(object)
+                values[::2] = NA
             self.df = DataFrame(values, dtype=dtype)
         self.fill_values = self.df.iloc[self.df.first_valid_index()].to_dict()
 

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -689,6 +689,10 @@ def setup(self, dtype, method, with_nans):
             null_vals = vals.astype(float, copy=True)
             null_vals[::2, :] = np.nan
             null_vals[::3, :] = np.nan
+            if dtype in ["Int64", "Float64"]:
+                null_vals = null_vals.astype(object)
+                null_vals[::2, :] = NA
+                null_vals[::3, :] = NA
             df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
             df["key"] = keys
             self.df = df

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
@@ -75,7 +75,7 @@ or convert from existing pandas data:
 
 .. ipython:: python
 
-   s1 = pd.Series([1, 2, np.nan], dtype="Int64")
+   s1 = pd.Series([1, 2, pd.NA], dtype="Int64")
    s1
    s2 = s1.astype("string")
    s2

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -50,7 +50,7 @@ marker of ``np.nan`` will infer to integer dtype. The display of the ``Series``
 
 .. ipython:: python
 
-   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s = pd.Series([1, 2, pd.NA], dtype='Int64')
    s
 
 
@@ -166,7 +166,7 @@ See the :ref:`dtypes docs <basics.dtypes>` for more on extension arrays.
 
 .. ipython:: python
 
-   pd.array([1, 2, np.nan], dtype='Int64')
+   pd.array([1, 2, pd.NA], dtype='Int64')
    pd.array(['a', 'b', 'c'], dtype='category')
 
 Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -814,6 +814,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 - Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -33,3 +33,8 @@
 def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
+
+
+def is_nan_na() -> bool:
+    _mode_options = _global_config["mode"]
+    return _mode_options["nan_is_na"]
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -14,3 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -249,6 +249,24 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_pdna_or_none(values: ndarray) -> ndarray:
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA:
+            result[i] = True
+    return result.view(bool)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -8,6 +8,8 @@ from csv import (
 )
 import warnings
 
+from pandas._config import is_nan_na
+
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -43,7 +45,6 @@ from libc.string cimport (
     strncpy,
 )
 
-
 import numpy as np
 
 cimport numpy as cnp
@@ -1461,7 +1462,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr, from_pandas=True))
+        arr = ArrowExtensionArray(pa.array(arr, from_pandas=is_nan_na()))
 
     return arr
 

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -2116,3 +2116,10 @@ def temp_file(tmp_path):
 def monkeysession():
     with pytest.MonkeyPatch.context() as mp:
         yield mp
+
+
+@pytest.fixture(params=[True, False])
+def using_nan_is_na(request):
+    opt = request.param
+    with pd.option_context("mode.nan_is_na", opt):
+        yield opt
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
@@ -7,7 +7,10 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas.errors import LossySetitemError
 
 from pandas.core.dtypes.cast import np_can_hold_element
@@ -21,7 +24,10 @@
 
 
 def to_numpy_dtype_inference(
-    arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool
+    arr: ArrayLike,
+    dtype: npt.DTypeLike | None,
+    na_value,
+    hasna: bool,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -34,7 +40,11 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    na_value = np.nan
+                    if not is_nan_na():
+                        na_value = NA
+                        dtype = np.dtype(object)
+                    else:
+                        na_value = np.nan
         else:
             dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
     elif dtype is not None: