@@ -6,14 +6,16 @@ import (
6
6
"encoding/json"
7
7
"errors"
8
8
"fmt"
9
- "github.com/mesosphere/dcos-commons/cli/client"
10
- "github.com/mesosphere/dcos-commons/cli/config"
11
- "gopkg.in/alecthomas/kingpin.v3-unstable"
12
9
"log"
13
10
"net/url"
14
11
"os"
15
12
"regexp"
16
13
"strings"
14
+
15
+ "github.com/mattn/go-shellwords"
16
+ "github.com/mesosphere/dcos-commons/cli/client"
17
+ "github.com/mesosphere/dcos-commons/cli/config"
18
+ "gopkg.in/alecthomas/kingpin.v3-unstable"
17
19
)
18
20
19
21
var keyWhitespaceValPattern = regexp .MustCompile ("(.+)\\ s+(.+)" )
@@ -146,8 +148,10 @@ Args:
146
148
StringVar (& args .mainClass ) // note: spark-submit can autodetect, but only for file://local.jar
147
149
submit .Flag ("properties-file" , "Path to file containing whitespace-separated Spark property defaults." ).
148
150
PlaceHolder ("PATH" ).ExistingFileVar (& args .propertiesFile )
149
- submit .Flag ("conf" , "Custom Spark configuration properties." ).
150
- PlaceHolder ("PROP=VALUE" ).StringMapVar (& args .properties )
151
+ submit .Flag ("conf" , "Custom Spark configuration properties. " +
152
+ "If submitting properties with multiple values, " +
153
+ "wrap in single quotes e.g. --conf prop='val1 val2'" ).
154
+ PlaceHolder ("prop=value" ).StringMapVar (& args .properties )
151
155
submit .Flag ("kerberos-principal" , "Principal to be used to login to KDC." ).
152
156
PlaceHolder ("user@REALM" ).Default ("" ).StringVar (& args .kerberosPrincipal )
153
157
submit .Flag ("keytab-secret-path" , "Path to Keytab in secret store to be used in the Spark drivers" ).
@@ -280,75 +284,84 @@ func parseApplicationFile(args *sparkArgs) error {
280
284
return nil
281
285
}
282
286
283
- func cleanUpSubmitArgs ( argsStr string , boolVals [] * sparkVal ) ([] string , [] string ) {
284
-
285
- // collapse two or more spaces to one.
286
- argsCompacted := collapseSpacesPattern . ReplaceAllString (argsStr , " " )
287
+ // we use Kingpin to parse CLI commands and options
288
+ // spark-submit by convention uses '--arg val' while kingpin only supports --arg=val
289
+ // transformSubmitArgs turns the former into the latter
290
+ func transformSubmitArgs (argsStr string , boolVals [] * sparkVal ) ([] string , [] string ) {
287
291
// clean up any instances of shell-style escaped newlines: "arg1\\narg2" => "arg1 arg2"
288
- argsCleaned := strings .TrimSpace (backslashNewlinePattern .ReplaceAllLiteralString (argsCompacted , " " ))
289
- // HACK: spark-submit uses '--arg val' by convention, while kingpin only supports '--arg=val'.
290
- // translate the former into the latter for kingpin to parse.
291
- args := strings .Split (argsCleaned , " " )
292
- argsEquals := make ([]string , 0 )
293
- appFlags := make ([]string , 0 )
294
- i := 0
295
- ARGLOOP:
296
- for i < len (args ) {
297
- arg := args [i ]
298
- if ! strings .HasPrefix (arg , "-" ) {
299
- // looks like we've exited the flags entirely, and are now at the jar and/or args.
300
- // any arguments without a dash at the front should've been joined to preceding keys.
301
- // flush the rest and exit.
302
- for i < len (args ) {
303
- arg = args [i ]
304
- // if we have a --flag going to the application we need to take the arg (flag) and the value ONLY
305
- // if it's not of the format --flag=val which scopt allows
306
- if strings .HasPrefix (arg , "-" ) {
307
- appFlags = append (appFlags , arg )
308
- if strings .Contains (arg , "=" ) || (i + 1 ) >= len (args ) {
309
- i += 1
310
- } else {
311
- // if there's a value with this flag, add it
312
- if ! strings .HasPrefix (args [i + 1 ], "-" ) {
313
- appFlags = append (appFlags , args [i + 1 ])
314
- i += 1
315
- }
316
- i += 1
317
- }
318
- } else {
319
- argsEquals = append (argsEquals , arg )
320
- i += 1
321
- }
292
+ argsStr = strings .TrimSpace (backslashNewlinePattern .ReplaceAllLiteralString (argsStr , " " ))
293
+ // collapse two or more spaces to one
294
+ argsStr = collapseSpacesPattern .ReplaceAllString (argsStr , " " )
295
+ // parse argsStr into []string args maintaining shell escaped sequences
296
+ args , err := shellwords .Parse (argsStr )
297
+ if err != nil {
298
+ log .Fatalf ("Could not parse string args correctly. Error: %v" , err )
299
+ }
300
+ sparkArgs , appArgs := make ([]string , 0 ), make ([]string , 0 )
301
+ LOOP:
302
+ for i := 0 ; i < len (args ); {
303
+ current := strings .TrimSpace (args [i ])
304
+ switch {
305
+ // The main assumption with --submit-args is that all spark-submit flags come before the spark jar URL
306
+ // if current is a spark jar/app, we've processed all flags
307
+ case isSparkApp (current ):
308
+ sparkArgs = append (sparkArgs , args [i ])
309
+ appArgs = append (appArgs , args [i + 1 :]... )
310
+ break LOOP
311
+ case strings .HasPrefix (current , "--" ):
312
+ if isBoolFlag (boolVals , current ) {
313
+ sparkArgs = append (sparkArgs , current )
314
+ i ++
315
+ continue LOOP
322
316
}
323
- break
324
- }
325
- // join this arg to the next arg if...:
326
- // 1. we're not at the last arg in the array
327
- // 2. we start with "--"
328
- // 3. we don't already contain "=" (already joined)
329
- // 4. we aren't a boolean value (no val to join)
330
- if i < len (args )- 1 && strings .HasPrefix (arg , "--" ) && ! strings .Contains (arg , "=" ) {
331
- // check for boolean:
332
- for _ , boolVal := range boolVals {
333
- if boolVal .flagName == arg [2 :] {
334
- argsEquals = append (argsEquals , arg )
335
- i += 1
336
- continue ARGLOOP
337
- }
317
+ if strings .Contains (current , "=" ) {
318
+ // already in the form arg=val, no merge required
319
+ sparkArgs = append (sparkArgs , current )
320
+ i ++
321
+ continue LOOP
338
322
}
339
- // merge this --key against the following val to get --key=val
340
- argsEquals = append (argsEquals , arg + "=" + args [i + 1 ])
323
+ // otherwise, merge current with next into form arg=val; eg --driver-memory=512m
324
+ next := args [i + 1 ]
325
+ sparkArgs = append (sparkArgs , current + "=" + next )
341
326
i += 2
342
- } else {
343
- // already joined or at the end, pass through:
344
- argsEquals = append (argsEquals , arg )
345
- i += 1
327
+ default :
328
+ // if not a flag or jar, current is a continuation of the last arg and should not have been split
329
+ // eg extraJavaOptions="-Dparam1 -Dparam2" was parsed as [extraJavaOptions, -Dparam1, -Dparam2]
330
+ combined := sparkArgs [len (sparkArgs )- 1 ] + " " + current
331
+ sparkArgs = append (sparkArgs [:len (sparkArgs )- 1 ], combined )
332
+ i ++
346
333
}
347
334
}
348
- client .PrintVerbose ("Translated spark-submit arguments: '%s'" , argsEquals )
349
- client .PrintVerbose ("Translated application arguments: '%s'" , appFlags )
335
+ if config .Verbose {
336
+ client .PrintVerbose ("Translated spark-submit arguments: '%s'" , strings .Join (sparkArgs , ", " ))
337
+ client .PrintVerbose ("Translated application arguments: '%s'" , strings .Join (appArgs , ", " ))
338
+ }
339
+ return sparkArgs , appArgs
340
+ }
350
341
351
- return argsEquals , appFlags
342
+ var acceptedSparkAppExtensions = []string {
343
+ ".jar" ,
344
+ ".py" ,
345
+ ".R" ,
346
+ }
347
+
348
+ func isSparkApp (str string ) bool {
349
+ for _ , ext := range acceptedSparkAppExtensions {
350
+ if strings .HasSuffix (str , ext ) {
351
+ return true
352
+ }
353
+ }
354
+ return false
355
+ }
356
+
357
+ // check if string is a boolean flag (eg --supervise)
358
+ func isBoolFlag (boolVals []* sparkVal , str string ) bool {
359
+ for _ , boolVal := range boolVals {
360
+ if boolVal .flagName == str [2 :] {
361
+ return true
362
+ }
363
+ }
364
+ return false
352
365
}
353
366
354
367
func getValsFromPropertiesFile (path string ) map [string ]string {
@@ -416,7 +429,7 @@ func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) (
416
429
// then map flags
417
430
submit , args := sparkSubmitArgSetup () // setup
418
431
// convert and get application flags, add them to the args passed to the spark app
419
- submitArgs , appFlags := cleanUpSubmitArgs (cmd .submitArgs , args .boolVals )
432
+ submitArgs , appFlags := transformSubmitArgs (cmd .submitArgs , args .boolVals )
420
433
args .appArgs = append (args .appArgs , appFlags ... )
421
434
_ , err := submit .Parse (submitArgs )
422
435
@@ -509,7 +522,7 @@ func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) (
509
522
} else {
510
523
client .PrintMessage ("Using image '%s' for the driver and the executors (from %s)." ,
511
524
args .properties ["spark.mesos.executor.docker.image" ], imageSource )
512
- client .PrintMessage ("To disable this image on executors, set " +
525
+ client .PrintMessage ("To disable this image on executors, set " +
513
526
"spark.mesos.executor.docker.forcePullImage=false" )
514
527
args .properties ["spark.mesos.executor.docker.forcePullImage" ] = "true"
515
528
}
0 commit comments