[PJRT] Release the GIL during TransferFromServer (#4504)

will-cromar · web-flow · commit d41c5ba23b7f · 2023-01-26T17:41:33.000-08:00
* Release the GIL during `TransferFromServer`

* Check for GIL before releasing it

* Add comment

* formatting

* Check for Python intitialization before GIL status

* Add explanation and warning

* Add temporary switch to hold onto GIL

* Formatting
diff --git a/configuration.yaml b/configuration.yaml
@@ -497,3 +497,8 @@ variables:
         - List of metrics percentiles to record.
       type: string
       default_value: "0.01:0.05:0.1:0.2:0.5:0.8:0.9:0.95:0.99"
+    XLA_RELEASE_GIL_DURING_TRANSFER:
+      descripton:
+        - Release Python's GIL when transferring data from the runtime.
+      type: bool
+      default_value: true
diff --git a/torch_xla/csrc/xla_graph_executor.cpp b/torch_xla/csrc/xla_graph_executor.cpp
@@ -1,5 +1,7 @@
 #include "torch_xla/csrc/xla_graph_executor.h"
 
+#include <Python.h>
+
 #include <algorithm>
 #include <atomic>
 #include <cmath>
@@ -635,9 +637,28 @@ std::vector<at::Tensor> XLAGraphExecutor::GetTensorsFused(
       *tensors, async != nullptr ? async->indices : absl::Span<const size_t>(),
       async != nullptr ? async->tensors_data
                        : absl::Span<const torch::lazy::BackendDataPtr>());
+
+  // Execution is async in PJRT, so TransferFromServer may block until execution
+  // completes. Release the GIL so other threads can proceed and unblock any
+  // collective computations.
+  // HACK: This method may be called outside of python (mainly in C++ tests) or
+  // when the GIL is already released, so we must check both cases here. If
+  // possible, prefer to release the GIL in the python bindings before copying
+  // this pattern.
+  PyThreadState* save = nullptr;
+  // TODO(wcromar): Remove this setting when we are more confident
+  static const bool release_gil =
+      xla::sys_util::GetEnvBool("XLA_RELEASE_GIL_DURING_TRANSFER", true);
+  if (release_gil && Py_IsInitialized() && PyGILState_Check()) {
+    save = PyEval_SaveThread();
+  }
   std::vector<xla::Literal> literals =
       xla::ComputationClient::Get()->TransferFromServer(
           UnwrapXlaData(tensors_data));
+  if (save) {
+    PyEval_RestoreThread(save);
+  }
+
   return FetchTensors(tensors, literals,
                       async != nullptr ? &async->indices : nullptr);
 }