第三次实现Logistic Regression(c++)_实现(二)

2. SGD训练

SGD权重更新方式,同LR二分类的基本相同;所不同的是,二分类LR只用训练一个权重向量,而K分类LR需要训练K-1个权重向量。函数接口如下:

// train by SGD on the sample file
bool TrainSGDOnSampleFile (
			const char * sFileName, int iClassNum, int iFeatureNum,		// about the samples
			double dLearningRate,										// about the learning 
			int iMaxLoop, double dMinImproveRatio						// about the stop criteria
			);

调用private函数如下:

// initialize the theta matrix with iClassNum and iFeatureNum
bool InitThetaMatrix (int iClassNum, int iFeatureNum);
// calculate the model function output for iClassIndex by feature vector
double CalcFuncOutByFeaVec (vector<FeaValNode> & FeaValNodeVec, int iClassIndex);
// calculate the model function output for all the classes, and return the class index with max probability
int CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec);
// calculate the gradient and update the theta matrix, it returns the cost
double UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate);

函数功能分别是初始化权重矩阵、利用现有LR模型参数计算当前类别的预测概率、计算所有类别的预测概率、更新权重。

SGD的函数实现代码如下:

// the sample format: classid feature1_value feature2_value...
bool LogisticRegression::TrainSGDOnSampleFile (
			const char * sFileName, int iClassNum, int iFeatureNum,		// about the samples
			double dLearningRate = 0.05,								// about the learning
			int iMaxLoop = 1, double dMinImproveRatio = 0.01			// about the stop criteria
			)
{
	ifstream in (sFileName);
	if (!in)
	{
		cerr << "Can not open the file of " << sFileName << endl;
		return false;
	}

	if (!InitThetaMatrix (iClassNum, iFeatureNum))
		return false;

	double dCost = 0.0;
	double dPreCost = 100.0;
	for (int iLoop = 0; iLoop < iMaxLoop; iLoop++)
	{
		int iSampleNum = 0;
		int iErrNum = 0;
		string sLine;
		while (getline (in, sLine))
		{
			Sample theSample;
			if (ReadSampleFrmLine (sLine, theSample))
			{
				vector<double> ClassProbVec;
				int iPredClassIndex = CalcFuncOutByFeaVecForAllClass (theSample.FeaValNodeVec, ClassProbVec);
				if (iPredClassIndex != theSample.iClass)
					iErrNum++;

				dCost += UpdateThetaMatrix (theSample, ClassProbVec, dLearningRate); 
				iSampleNum++;
			}
		}

		dCost /= iSampleNum;
		double dTmpRatio = (dPreCost - dCost) / dPreCost;
		double dTmpErrRate = (double)iErrNum / iSampleNum;

		// show info on screen
		cout << "In loop " << iLoop << ": current cost (" << dCost << ") previous cost (" << dPreCost << ") ratio (" << dTmpRatio << ") "<< endl;
		cout << "And Error rate : " << dTmpErrRate << endl;

		if (dTmpRatio < dMinImproveRatio)
			break;
		else
		{
			dPreCost = dCost;
			dCost = 0.0;
			//reset the current reading position of file
			in.clear();
			in.seekg (0, ios::beg);
		}
	}

	return true;
}

其中计算各个类别概率方式如下:

// it returns the value of f(x) = exp (W*X) for iClassIndex < K, otherwise 1.0 for iClassIndex == K
double LogisticRegression::CalcFuncOutByFeaVec(vector<FeaValNode> & FeaValNodeVec, int iClassIndex)
{
	if (iClassIndex >= iClassNum || iClassIndex <0)		// wrong situation
		return 0.0;

	if (iClassIndex == (iClassNum-1) )	// the default class (here is the class with max index)
		return 1.0;

	double dX = 0.0;
	vector<FeaValNode>::iterator p = FeaValNodeVec.begin();
	while (p != FeaValNodeVec.end())
	{
		if (p->iFeatureId < (int)ThetaMatrix.at(iClassIndex).size())	// all input is evil
			dX += ThetaMatrix[iClassIndex][p->iFeatureId] * p->dValue;			
		p++;
	}

	double dY = exp (dX);
	return dY;
}

注意两点:1. 在K个类别中,第K个类别是default类别;2. 此时返回的不是sigmoid函数值,而是指数函数值。最终的概率在如下代码中计算:

// the class probability is calculated by : 
// f(x) = exp (W*X) / {1.0 + sum_exp (W*X)} as long as iClassIndex < K
// f(x) = 1.0 / {1.0 + sum_exp (W*X)} as long as iClassIndex == K
int LogisticRegression::CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec)
{
	ClassProbVec.clear();
	ClassProbVec.resize (iClassNum, 0.0);

	double dSum = 1.0;
	for (int i=0; i<iClassNum; i++)
	{
		ClassProbVec.at(i) = CalcFuncOutByFeaVec (FeaValNodeVec, i);
		dSum += ClassProbVec.at(i);
	}

	double dMaxProb = 0.0;
	int iClassMaxProb = -1;
	for (int i=0; i<iClassNum; i++)
	{
		ClassProbVec.at(i) /= dSum;
		if (ClassProbVec.at(i) > dMaxProb)
			iClassMaxProb = i;
	}

	return iClassMaxProb;
}
计算出的概率实际上是softmax概率。权重更新函数:
// the update formula is : theta_new = theta_old - dLearningRate * (dY - iClass) * dXi
double LogisticRegression::UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate)
{
	double dCost = 0.0;
	for (int i=0; i<iClassNum-1; i++)
	{
		if (i == theSample.iClass)
		{
			vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
			while (p != theSample.FeaValNodeVec.end())
			{
				if (p->iFeatureId < (int)ThetaMatrix[i].size())
				{
					double dGradient = (ClassProbVec[i] - 1.0) * p->dValue;
					double dDelta = dGradient * dLearningRate;
					ThetaMatrix[i][p->iFeatureId] -= dDelta;
				}
				p++;
			}
			// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
			dCost -= log (ClassProbVec[i]);
		}
		else
		{
			vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
			while (p != theSample.FeaValNodeVec.end())
			{
				if (p->iFeatureId < (int)ThetaMatrix[i].size())
				{
					double dGradient = ClassProbVec[i] * p->dValue;
					double dDelta = dGradient * dLearningRate;
					ThetaMatrix[i][p->iFeatureId] -= dDelta;
				}
				p++;
			}
			// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
			dCost -= log (1.0 - ClassProbVec[i]);
		}
	}

	return dCost;
}

完。


转载请注明出处:http://blog.csdn.net/xceman1997/article/details/18449317

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。