@@ -67,42 +67,35 @@ class ObjectStoreWriter(@transient val df: DataFrame) extends Serializable {
6767 queue : ObjectRefHolder .Queue ,
6868 ownerName : String ): RecordBatch = {
6969
70- // NOTE: We intentionally do NOT pass an owner argument to Ray.put anymore.
71- //
72- // - When ownerName is empty, route the put via the JVM RayAppMaster actor.
73- // - When ownerName is set to a Python actor name (e.g. RayDPSparkMaster),
74- // invoke that Python actor's put_data(data) method via Ray cross-language
75- // calls so that the Python actor becomes the owner of the created object.
76- val objectRef : ObjectRef [_] =
77- if (ownerName == " " ) {
78- Ray .put(data)
79- } else {
80- // Ray.getActor(String) is a raw Java Optional in Ray's Java API.
81- // If we don't cast it to an explicit reference type here, Scala may infer
82- // Optional[Nothing] and insert an invalid cast at runtime.
83- val opt = Ray .getActor(ownerName).asInstanceOf [Optional [AnyRef ]]
84- if (! opt.isPresent) {
85- throw new RayDPException (s " Actor $ownerName not found when putting dataset block. " )
86- }
87- val handleAny : AnyRef = opt.get()
88- if (! handleAny.isInstanceOf [PyActorHandle ]) {
89- throw new RayDPException (
90- s " Actor $ownerName is not a Python actor; cannot invoke put_data. "
91- )
92- }
93- val pyHandle = handleAny.asInstanceOf [PyActorHandle ]
94- val method = PyActorMethod .of(" put_data" , classOf [AnyRef ])
95- val refOfRef = pyHandle.task(method, data).remote()
96- refOfRef
97- }
70+ // Owner-transfer only implementation:
71+ // - ownerName must always be provided (non-empty) and refer to a Python actor.
72+ // - JVM never creates/handles Ray ObjectRefs for the dataset blocks.
73+ // - JVM returns only a per-batch key encoded in RecordBatch.objectId (bytes),
74+ // and Python will fetch the real ObjectRefs from the owner actor by key.
75+
76+ if (ownerName == null || ownerName.isEmpty) {
77+ throw new RayDPException (" ownerName must be set for Spark->Ray conversion." )
78+ }
79+
80+ val opt = Ray .getActor(ownerName).asInstanceOf [Optional [AnyRef ]]
81+ if (! opt.isPresent) {
82+ throw new RayDPException (s " Actor $ownerName not found when putting dataset block. " )
83+ }
84+ val handleAny : AnyRef = opt.get()
85+ if (! handleAny.isInstanceOf [PyActorHandle ]) {
86+ throw new RayDPException (s " Actor $ownerName is not a Python actor; cannot invoke put_data. " )
87+ }
88+ val pyHandle = handleAny.asInstanceOf [PyActorHandle ]
89+ val batchKey = UUID .randomUUID().toString
90+
91+ // put_data(batchKey, arrowBytes) -> boolean ack
92+ val method = PyActorMethod .of(" put_data" , classOf [java.lang.Boolean ])
93+ val args : Array [AnyRef ] = Array (batchKey, data.asInstanceOf [AnyRef ])
94+ new PyActorTaskCaller (pyHandle, method, args).remote().get()
9895
99- // add the objectRef to the objectRefHolder to avoid reference GC
100- queue.add(objectRef)
101- val objectRefImpl = RayDPUtils .convert(objectRef)
102- val objectId = objectRefImpl.getId
103- val runtime = Ray .internal.asInstanceOf [AbstractRayRuntime ]
104- val addressInfo = runtime.getObjectStore.getOwnershipInfo(objectId)
105- RecordBatch (addressInfo, objectId.getBytes, numRecords)
96+ // ownerAddress/objectId here are not Ray's object metadata; objectId encodes the key.
97+ // Python side will treat objectId as UTF-8 key bytes.
98+ RecordBatch (Array .emptyByteArray, batchKey.getBytes(" UTF-8" ), numRecords)
10699 }
107100
108101 /**
0 commit comments