Skip to content

Typed SDCA binary trainers #2506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static void Calibration()
// Then append the StochasticDualCoordinateAscentBinary binary classifier, setting the "Label" column as the label of the dataset, and
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
labelColumn: "Sentiment",
featureColumn: "Features",
l2Const: 0.001f,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

namespace Microsoft.ML.Samples.Dynamic
{
public class SDCA_BinaryClassificationExample
public class SDCALogisticRegression
{
public static void SDCA_BinaryClassification()
public static void Example()
{
// Downloading the dataset from github.com/dotnet/machinelearning.
// This will create a sentiment.tsv file in the filesystem.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
using System;
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
{
public class SDCASupportVectorMachine
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SDCASupportVectorMachine [](start = 17, length = 24)

This is a static class right? (Only if you happen to post another commit after this.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hopefully #2548 can fix it.

{
public static void Example()
{
// Generate IEnumerable<BinaryLabelFloatFeatureVectorSample> as training examples.
var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);

// Information in first example.
// Label: true
Console.WriteLine("First example's label is {0}", rawData.First().Label);
// Features is a 10-element float[]:
// [0] 1.0173254 float
// [1] 0.9680227 float
// [2] 0.7581612 float
// [3] 0.406033158 float
// [4] 0.7588848 float
// [5] 1.10602713 float
// [6] 0.6421779 float
// [7] 1.17754972 float
// [8] 0.473704457 float
// [9] 0.4919063 float
Console.WriteLine("First example's feature vector is {0}", rawData.First().Features);

// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();

// Step 1: Read the data as an IDataView.
var data = mlContext.Data.ReadFromEnumerable(rawData);

// ML.NET doesn't cache data set by default. Caching is always recommended when using the
// StochasticDualCoordinateAscent algorithm because it may incur multiple data passes.
data = mlContext.Data.Cache(data);

// Step 2: Create a binary classifier. This trainer may produce a logistic regression model.
// We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
labelColumn: "Label", featureColumn: "Features", loss: new HingeLoss(), l2Const: 0.001f);

// Step 3: Train the pipeline created.
var model = pipeline.Fit(data);

// Step 4: Make prediction and evaluate its quality (on training set).
var prediction = model.Transform(data);

var rawPrediction = mlContext.CreateEnumerable<SamplesUtils.DatasetUtils.NonCalibratedBinaryClassifierOutput>(prediction, false);

// Step 5: Inspect the prediction of the first example.
// Note that positive/negative label may be associated with positive/negative score
var first = rawPrediction.First();
Console.WriteLine("The first example actual label is {0}. The trained model assigns it a score {1}.",
first.Label /*true*/, first.Score /*around 3*/);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using Microsoft.ML.Data;
using Microsoft.ML.StaticPipe;

namespace Microsoft.ML.Samples.Static
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

namespace Microsoft.ML.Trainers.HalLearners
{
using TPredictor = CalibratedModelParametersBase<LinearBinaryModelParameters,PlattCalibrator>;
using TPredictor = CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>;

/// <include file='doc.xml' path='doc/members/member[@name="SymSGD"]/*' />
public sealed class SymSgdClassificationTrainer : TrainerEstimatorBase<BinaryPredictionTransformer<TPredictor>, TPredictor>
Expand Down Expand Up @@ -207,7 +207,7 @@ private TPredictor CreatePredictor(VBuffer<float> weights, float bias)
VBufferUtils.CreateMaybeSparseCopy(in weights, ref maybeSparseWeights,
Conversions.Instance.GetIsDefaultPredicate<float>(NumberType.R4));
var predictor = new LinearBinaryModelParameters(Host, in maybeSparseWeights, bias);
return new ParameterMixingCalibratedModelParameters<LinearBinaryModelParameters,PlattCalibrator>(Host, predictor, new PlattCalibrator(Host, -1, 0));
return new ParameterMixingCalibratedModelParameters<LinearBinaryModelParameters, PlattCalibrator>(Host, predictor, new PlattCalibrator(Host, -1, 0));
}

protected override BinaryPredictionTransformer<TPredictor> MakeTransformer(TPredictor model, Schema trainSchema)
Expand Down
24 changes: 24 additions & 0 deletions src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@ public static IEnumerable<SampleVectorOfNumbersData> GetVectorOfNumbersData()

private const int _simpleBinaryClassSampleFeatureLength = 10;

/// <summary>
/// Example with one binary label and 10 feature values.
/// </summary>
public class BinaryLabelFloatFeatureVectorSample
{
public bool Label;
Expand All @@ -379,6 +382,27 @@ public class BinaryLabelFloatFeatureVectorSample
public float[] Features;
}

/// <summary>
/// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorSample"/> when
/// calling <see cref="CursoringUtils.CreateEnumerable"/> via on <see cref="MLContext"/>.
/// </summary>
public class CalibratedBinaryClassifierOutput
{
public bool Label;
public float Score;
public float Probability;
}

/// <summary>
/// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorSample"/> when
/// calling <see cref="CursoringUtils.CreateEnumerable"/> via on <see cref="MLContext"/>.
/// </summary>
public class NonCalibratedBinaryClassifierOutput
{
public bool Label;
public float Score;
}

public static IEnumerable<BinaryLabelFloatFeatureVectorSample> GenerateBinaryLabelFloatFeatureVectorSamples(int exampleCount)
{
var rnd = new Random(0);
Expand Down
Loading