@@ -364,6 +364,62 @@ abstract class RDD[T: ClassManifest](
364
364
preservesPartitioning : Boolean = false ): RDD [U ] =
365
365
new MapPartitionsWithIndexRDD (this , sc.clean(f), preservesPartitioning)
366
366
367
+ /**
368
+ * Maps f over this RDD, where f takes an additional parameter of type A. This
369
+ * additional parameter is produced by constructA, which is called in each
370
+ * partition with the index of that partition.
371
+ */
372
+ def mapWith [A : ClassManifest , U : ClassManifest ](constructA : Int => A , preservesPartitioning : Boolean = false )
373
+ (f: (T , A ) => U ): RDD [U ] = {
374
+ def iterF (index : Int , iter : Iterator [T ]): Iterator [U ] = {
375
+ val a = constructA(index)
376
+ iter.map(t => f(t, a))
377
+ }
378
+ new MapPartitionsWithIndexRDD (this , sc.clean(iterF _), preservesPartitioning)
379
+ }
380
+
381
+ /**
382
+ * FlatMaps f over this RDD, where f takes an additional parameter of type A. This
383
+ * additional parameter is produced by constructA, which is called in each
384
+ * partition with the index of that partition.
385
+ */
386
+ def flatMapWith [A : ClassManifest , U : ClassManifest ](constructA : Int => A , preservesPartitioning : Boolean = false )
387
+ (f: (T , A ) => Seq [U ]): RDD [U ] = {
388
+ def iterF (index : Int , iter : Iterator [T ]): Iterator [U ] = {
389
+ val a = constructA(index)
390
+ iter.flatMap(t => f(t, a))
391
+ }
392
+ new MapPartitionsWithIndexRDD (this , sc.clean(iterF _), preservesPartitioning)
393
+ }
394
+
395
+ /**
396
+ * Applies f to each element of this RDD, where f takes an additional parameter of type A.
397
+ * This additional parameter is produced by constructA, which is called in each
398
+ * partition with the index of that partition.
399
+ */
400
+ def foreachWith [A : ClassManifest ](constructA : Int => A )
401
+ (f: (T , A ) => Unit ) {
402
+ def iterF (index : Int , iter : Iterator [T ]): Iterator [T ] = {
403
+ val a = constructA(index)
404
+ iter.map(t => {f(t, a); t})
405
+ }
406
+ (new MapPartitionsWithIndexRDD (this , sc.clean(iterF _), true )).foreach(_ => {})
407
+ }
408
+
409
+ /**
410
+ * Filters this RDD with p, where p takes an additional parameter of type A. This
411
+ * additional parameter is produced by constructA, which is called in each
412
+ * partition with the index of that partition.
413
+ */
414
+ def filterWith [A : ClassManifest ](constructA : Int => A )
415
+ (p: (T , A ) => Boolean ): RDD [T ] = {
416
+ def iterF (index : Int , iter : Iterator [T ]): Iterator [T ] = {
417
+ val a = constructA(index)
418
+ iter.filter(t => p(t, a))
419
+ }
420
+ new MapPartitionsWithIndexRDD (this , sc.clean(iterF _), true )
421
+ }
422
+
367
423
/**
368
424
* Zips this RDD with another one, returning key-value pairs with the first element in each RDD,
369
425
* second element in each RDD, etc. Assumes that the two RDDs have the *same number of
@@ -382,6 +438,14 @@ abstract class RDD[T: ClassManifest](
382
438
sc.runJob(this , (iter : Iterator [T ]) => iter.foreach(cleanF))
383
439
}
384
440
441
+ /**
442
+ * Applies a function f to each partition of this RDD.
443
+ */
444
+ def foreachPartition (f : Iterator [T ] => Unit ) {
445
+ val cleanF = sc.clean(f)
446
+ sc.runJob(this , (iter : Iterator [T ]) => f(iter))
447
+ }
448
+
385
449
/**
386
450
* Return an array that contains all of the elements in this RDD.
387
451
*/
@@ -404,7 +468,7 @@ abstract class RDD[T: ClassManifest](
404
468
405
469
/**
406
470
* Return an RDD with the elements from `this` that are not in `other`.
407
- *
471
+ *
408
472
* Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
409
473
* RDD will be <= us.
410
474
*/
0 commit comments