Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions paddle/gserver/layers/ConvShiftLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ void ConvShiftLayer::forward(PassType passType) {

MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

size_t batchSize = inV0->getHeight();
size_t dataDim = inV0->getWidth();

CHECK_EQ(batchSize, inV1->getHeight());
CHECK_EQ(dataDim, getSize());

{
Expand All @@ -86,7 +86,7 @@ void ConvShiftLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();

REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str());
outV->circularConv(*inV0, *inV1);
outV->circularConv(*inV0, *inV1, seqStartPosPtr, useGpu_);
}

void ConvShiftLayer::backward(const UpdateCallback& callback) {
Expand All @@ -95,11 +95,13 @@ void ConvShiftLayer::backward(const UpdateCallback& callback) {
MatrixPtr outG = getOutputGrad();
MatrixPtr inG0 = getInputGrad(0);
MatrixPtr inG1 = getInputGrad(1);
const ICpuGpuVectorPtr& seqStartPosPtr = getInput(0).sequenceStartPositions;

REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str());

if (inG0 && inG1) {
outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1);
outG->circularConvDerivative(
*outG, *inV0, *inV1, *inG0, *inG1, seqStartPosPtr, useGpu_);
} else {
CHECK(!inG0 || !inG1) << "Not supported";
}
Expand Down
26 changes: 25 additions & 1 deletion paddle/gserver/tests/LayerGradUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,17 +387,41 @@ void initDataLayer(TestConfig testConf,
data.value->sigmoid(*data.value);
data.grad->zeroMem();
break;
case INPUT_SEQUENCE_MNUM_DATA: {
// first calculate height
sequenceStartPositions =
ICpuGpuVector::create(batchSize + 1, /*useGpu=*/false);
int seqLen = 0;
int* buf = sequenceStartPositions->getMutableData(false);
int64_t pos = 0;
for (size_t j = 0; j < batchSize; ++j) {
seqLen = uniformRandom(testConf.inputDefs[i].maxLen) + 1;
buf[j] = pos;
pos += seqLen;
}
buf[batchSize] = pos;
fillData(trans, layer->getSize(), pos);
data.value->randomizeUniform();
data.value->add(-0.5);
if (testLayerName != "prelu") {
data.value->sigmoid(*data.value);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

406-408行可以去掉,这里不会用prelu这个layer

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

data.grad->zeroMem();
break;
}
default:
LOG(FATAL) << " unknown inputType ";
return;
}
if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) {
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA ||
testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MNUM_DATA) {
if (!sequenceStartPositions) {
generateSequenceStartPositions(batchSize, sequenceStartPositions);
}

data.sequenceStartPositions = sequenceStartPositions;
}
if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) {
Expand Down
18 changes: 17 additions & 1 deletion paddle/gserver/tests/LayerGradUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ enum InputType {
INPUT_SEQUENCE_LABEL,
INPUT_SPARSE_NON_VALUE_DATA,
INPUT_SPARSE_FLOAT_VALUE_DATA,
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_SEQUENCE_MNUM_DATA, // regard batchSize as sequence number
};

struct ParaSparse {
Expand Down Expand Up @@ -62,6 +63,7 @@ struct InputDef {
string name;
size_t dim;
size_t paraSize;
size_t maxLen; // maximum length of sequence data
ParaSparse sparse;
bool isStatic;
std::vector<int> labelInitValue;
Expand All @@ -76,6 +78,20 @@ struct InputDef {
isStatic = false;
}

InputDef(InputType type,
string nameIn,
size_t dimIn,
size_t sizeIn,
size_t maxSeqLen) {
inputType = type;
name = nameIn;
dim = dimIn;
paraSize = sizeIn;
maxLen = maxSeqLen;
sparse = {""};
isStatic = false;
}

InputDef(InputType type,
string nameIn,
size_t dimIn,
Expand Down
13 changes: 11 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,12 +902,16 @@ TEST(Layer, SequenceReshapeLayer) {
}
}

TEST(Layer, ConvShiftLayer) {
void testConvShiftLayer(string trans_type, size_t maxLen = 0) {
TestConfig config;
config.layerConfig.set_type("conv_shift");
config.layerConfig.set_size(10);

config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
if (trans_type == "non-seq")
config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
else
config.inputDefs.push_back(
{INPUT_SEQUENCE_MNUM_DATA, "layer_0", 10, 0, maxLen});
config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
Expand All @@ -916,6 +920,11 @@ TEST(Layer, ConvShiftLayer) {
testLayerGrad(config, "conv_shift", 100, false, false);
}

TEST(Layer, ConvShiftLayer) {
testConvShiftLayer("non-seq");
testConvShiftLayer("seq", 5);
}

TEST(Layer, PowerLayer) {
TestConfig config;
config.layerConfig.set_type("power");
Expand Down
95 changes: 63 additions & 32 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3877,71 +3877,102 @@ real CpuMatrix::getMax() {
return res;
}

void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) {
size_t height = this->getHeight();
void CpuMatrix::circularConv(Matrix& in0,
Matrix& in1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = this->getHeight();
size_t width0 = this->getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
// if sequence type, height1 should be sequence number
if (nullptr != seqStartPosPtr) {
numSeqs = seqStartPosPtr->getSize() - 1;
}

CHECK_EQ(height, in0.getHeight());
CHECK_EQ(height0, in0.getHeight());
CHECK_EQ(width0, in0.getWidth());
CHECK_EQ(height, in1.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());

CHECK_EQ(width1 % 2, 1U);

real* outV = this->getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();

const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height;
++x, outV += width0, inV0 += width0, inV1 += width1) {
for (size_t i = 0; i < width0; ++i) { // each dimension of output
for (size_t j = 0; j < width1; ++j) {
// iterate over all dimentions of inV1
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
outV[i] += inV0[index] * inV1[j];
// row first order, treate multiple rows as a long row
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
// conv a complete sequence
for (size_t i = 0; i < curSeqWidth; ++i) {
for (size_t j = 0; j < width1;
++j) { // iterate over convolution template
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(outV + i) += *(inV0 + index) * inV1[j];
}
}
outV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
}
}

void CpuMatrix::circularConvDerivative(
Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) {
size_t height = in0.getHeight();
void CpuMatrix::circularConvDerivative(Matrix& outG,
Matrix& in0,
Matrix& in1,
Matrix& inG0,
Matrix& inG1,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
size_t height0 = in0.getHeight();
size_t width0 = in0.getWidth();
size_t width1 = in1.getWidth();
size_t numSeqs = height0;
if (nullptr != seqStartPosPtr) numSeqs = seqStartPosPtr->getSize() - 1;

CHECK_EQ(height, in1.getHeight());
CHECK_EQ(height, inG0.getHeight());
CHECK_EQ(numSeqs, in1.getHeight());
CHECK_EQ(height0, inG0.getHeight());
CHECK_EQ(width0, inG0.getWidth());
CHECK_EQ(height, inG1.getHeight());
CHECK_EQ(numSeqs, inG1.getHeight());
CHECK_EQ(width1, inG1.getWidth());
CHECK_EQ(height, outG.getHeight());
CHECK_EQ(height0, outG.getHeight());
CHECK_EQ(width0, outG.getWidth());

real* outGV = outG.getData();
real* inV0 = in0.getData();
real* inV1 = in1.getData();
real* inGV0 = inG0.getData();
real* inGV1 = inG1.getData();
const int* startPosIntPtr = nullptr;
if (nullptr != seqStartPosPtr) {
startPosIntPtr = seqStartPosPtr->getData(useGpu);
}

int leftCtxLen = (width1 - 1) / 2;
for (size_t x = 0; x < height; ++x,
outGV += width0,
inV0 += width0,
inV1 += width1,
inGV0 += width0,
inGV1 += width1) {
for (size_t j = 0; j < width1; ++j) { // iterate over width1
for (size_t i = 0; i < width0; ++i) {
// such over all dimensions of outG
int index = i + j - leftCtxLen;
index = (index + width0) % width0;
inGV0[index] += outGV[i] * inV1[j];
inGV1[j] += outGV[i] * inV0[index];
for (size_t x = 0; x < numSeqs; ++x) {
size_t curSeqWidth = width0;
if (nullptr != startPosIntPtr)
curSeqWidth *= startPosIntPtr[x + 1] - startPosIntPtr[x];
for (size_t j = 0; j < width1; ++j) { // iterate over convolution template
for (size_t i = 0; i < curSeqWidth; ++i) {
int index = (i + j - leftCtxLen + curSeqWidth) % curSeqWidth;
*(inGV0 + index) += *(outGV + i) * inV1[j];
inGV1[j] += *(outGV + i) * *(inV0 + index);
}
}
outGV += curSeqWidth;
inV0 += curSeqWidth;
inV1 += width1;
inGV0 += curSeqWidth;
inGV1 += width1;
}
}

Expand Down
18 changes: 14 additions & 4 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,20 @@ class Matrix : public BaseMatrix {
* b's index arithmetic is computed modulo M,
* c's index arithmetic is computed modulo N.
*/
virtual void circularConv(Matrix& b, Matrix& c) {
virtual void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

virtual void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2) {
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr,
bool useGpu) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -1719,12 +1724,17 @@ class CpuMatrix : public Matrix {
IVector& label,
real alpha);

void circularConv(Matrix& b, Matrix& c);
void circularConv(Matrix& b,
Matrix& c,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);
void circularConvDerivative(Matrix& output,
Matrix& prevOut1,
Matrix& prevOut2,
Matrix& prevGrad1,
Matrix& prevGrad2);
Matrix& prevGrad2,
const ICpuGpuVectorPtr& seqStartPosPtr = nullptr,
bool useGpu = false);

void softmax(Matrix& output);
void sequenceSoftmax(Matrix& output, const IVector& index);
Expand Down