SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TLogRegFit Class Reference

#include <agm.h>

Public Member Functions

 TLogRegFit ()
 
 ~TLogRegFit ()
 
PLogRegPredict CalcLogRegGradient (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
 
PLogRegPredict CalcLogRegNewton (const TVec< TFltV > &XPt, const TFltV &yPt, const TStr &PlotNm=TStr(), const double &ChangeEps=0.01, const int &MaxStep=200, const bool InterceptPt=false)
 
int MLEGradient (const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
 
int MLENewton (const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
 
double GetStepSizeByLineSearch (const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
 
double Likelihood (const TFltV &NewTheta)
 
double Likelihood ()
 
void Gradient (TFltV &GradV)
 
void Hessian (TFltVV &HVV)
 
void GetNewtonStep (TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV)
 

Private Attributes

TVec< TFltVX
 
TFltV Y
 
TFltV Theta
 
int M
 

Detailed Description

Definition at line 167 of file agm.h.

Constructor & Destructor Documentation

TLogRegFit::TLogRegFit ( )
inline

Definition at line 174 of file agm.h.

174 {}
TLogRegFit::~TLogRegFit ( )
inline

Definition at line 175 of file agm.h.

175 {}

Member Function Documentation

PLogRegPredict TLogRegFit::CalcLogRegGradient ( const TVec< TFltV > &  XPt,
const TFltV yPt,
const TStr PlotNm = TStr(),
const double &  ChangeEps = 0.01,
const int &  MaxStep = 200,
const bool  InterceptPt = false 
)

Definition at line 901 of file agm.cpp.

901  {
902  X = XPt;
903  Y = yPt;
904  IAssert(X.Len() == Y.Len());
905  if (Intercept == false) { // if intercept is not included, add it
906  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
907  }
908  M = X[0].Len();
909  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
910  for (int r = 0; r < Y.Len(); r++) {
911  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
912  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
913  }
914  Theta.Gen(M);
915  MLEGradient(ChangeEps, MaxStep, PlotNm);
916  return new TLogRegPredict(Theta);
917 };
TVec< TFltV > X
Definition: agm.h:169
#define IAssert(Cond)
Definition: bd.h:262
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFltV Y
Definition: agm.h:170
TFltV Theta
Definition: agm.h:171
int M
Definition: agm.h:172
int MLEGradient(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:797
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
PLogRegPredict TLogRegFit::CalcLogRegNewton ( const TVec< TFltV > &  XPt,
const TFltV yPt,
const TStr PlotNm = TStr(),
const double &  ChangeEps = 0.01,
const int &  MaxStep = 200,
const bool  InterceptPt = false 
)

Definition at line 882 of file agm.cpp.

882  {
883 
884  X = XPt;
885  Y = yPt;
886  IAssert(X.Len() == Y.Len());
887  if (Intercept == false) { // if intercept is not included, add it
888  for (int r = 0; r < X.Len(); r++) { X[r].Add(1); }
889  }
890  M = X[0].Len();
891  for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); }
892  for (int r = 0; r < Y.Len(); r++) {
893  if (Y[r] >= 0.99999) { Y[r] = 0.99999; }
894  if (Y[r] <= 0.00001) { Y[r] = 0.00001; }
895  }
896  Theta.Gen(M);
897  MLENewton(ChangeEps, MaxStep, PlotNm);
898  return new TLogRegPredict(Theta);
899 };
TVec< TFltV > X
Definition: agm.h:169
#define IAssert(Cond)
Definition: bd.h:262
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFltV Y
Definition: agm.h:170
TFltV Theta
Definition: agm.h:171
int M
Definition: agm.h:172
int MLENewton(const double &ChangeEps, const int &MaxStep, const TStr PlotNm)
Definition: agm.cpp:770
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
void TLogRegFit::GetNewtonStep ( TFltVV HVV,
const TFltV GradV,
TFltV DeltaLV 
)

Definition at line 718 of file agm.cpp.

718  {
719  bool HSingular = false;
720  for (int i = 0; i < HVV.GetXDim(); i++) {
721  if (HVV(i,i) == 0.0) {
722  HVV(i,i) = 0.001;
723  HSingular = true;
724  }
725  DeltaLV[i] = GradV[i] / HVV(i, i);
726  }
727  if (! HSingular) {
728  if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite
729  for (int r = 0; r < Theta.Len(); r++) {
730  for (int c = 0; c < Theta.Len(); c++) {
731  HVV(r, c) = - HVV(r, c);
732  }
733  }
734  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
735  }
736  else {
737  TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV);
738  for (int i = 0; i < DeltaLV.Len(); i++) {
739  DeltaLV[i] = - DeltaLV[i];
740  }
741  }
742 
743  }
744 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetXDim() const
Definition: ds.h:2184
TFltV Theta
Definition: agm.h:171
static void SolveSymetricSystem(TFltVV &A, const TFltV &b, TFltV &x)
Definition: linalg.cpp:837
double TLogRegFit::GetStepSizeByLineSearch ( const TFltV DeltaV,
const TFltV GradV,
const double &  Alpha,
const double &  Beta 
)

Definition at line 837 of file agm.cpp.

837  {
838  double StepSize = 1.0;
839  double InitLikelihood = Likelihood();
840  IAssert(Theta.Len() == DeltaV.Len());
841  TFltV NewThetaV(Theta.Len());
842  double MinVal = -1e10, MaxVal = 1e10;
843  for(int iter = 0; ; iter++) {
844  for (int i = 0; i < Theta.Len(); i++){
845  NewThetaV[i] = Theta[i] + StepSize * DeltaV[i];
846  if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; }
847  if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; }
848  }
849  if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
850  StepSize *= Beta;
851  } else {
852  break;
853  }
854  }
855  return StepSize;
856 }
#define IAssert(Cond)
Definition: bd.h:262
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double Likelihood()
Definition: agm.h:183
TFltV Theta
Definition: agm.h:171
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
void TLogRegFit::Gradient ( TFltV GradV)

Definition at line 869 of file agm.cpp.

869  {
870  TFltV OutV;
872  GradV.Gen(M);
873  for (int r = 0; r < X.Len(); r++) {
874  //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
875  for (int m = 0; m < M; m++) {
876  GradV[m] += (Y[r] - OutV[r]) * X[r][m];
877  }
878  }
879  //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
880 }
TVec< TFltV > X
Definition: agm.h:169
static void GetCfy(const TVec< TFltV > &X, TFltV &OutV, const TFltV &NewTheta)
Definition: agm.cpp:933
TFltV Y
Definition: agm.h:170
TFltV Theta
Definition: agm.h:171
int M
Definition: agm.h:172
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
void TLogRegFit::Hessian ( TFltVV HVV)

Definition at line 746 of file agm.cpp.

746  {
747  HVV.Gen(Theta.Len(), Theta.Len());
748  TFltV OutV;
750  for (int i = 0; i < X.Len(); i++) {
751  for (int r = 0; r < Theta.Len(); r++) {
752  HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]);
753  for (int c = r + 1; c < Theta.Len(); c++) {
754  HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
755  HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]);
756  }
757  }
758  }
759  /*
760  printf("\n");
761  for (int r = 0; r < Theta.Len(); r++) {
762  for (int c = 0; c < Theta.Len(); c++) {
763  printf("%f\t", HVV.At(r, c).Val);
764  }
765  printf("\n");
766  }
767  */
768 }
TVec< TFltV > X
Definition: agm.h:169
static void GetCfy(const TVec< TFltV > &X, TFltV &OutV, const TFltV &NewTheta)
Definition: agm.cpp:933
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFltV Theta
Definition: agm.h:171
void Gen(const int &_XDim, const int &_YDim)
Definition: ds.h:2181
const TVal & At(const int &X, const int &Y) const
Definition: ds.h:2190
double TLogRegFit::Likelihood ( const TFltV NewTheta)

Definition at line 858 of file agm.cpp.

858  {
859  TFltV OutV;
860  TLogRegPredict::GetCfy(X, OutV, NewTheta);
861  double L = 0;
862  for (int r = 0; r < OutV.Len(); r++) {
863  L += Y[r] * log(OutV[r]);
864  L += (1 - Y[r]) * log(1 - OutV[r]);
865  }
866  return L;
867 }
TVec< TFltV > X
Definition: agm.h:169
static void GetCfy(const TVec< TFltV > &X, TFltV &OutV, const TFltV &NewTheta)
Definition: agm.cpp:933
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TFltV Y
Definition: agm.h:170
double TLogRegFit::Likelihood ( )
inline

Definition at line 183 of file agm.h.

183 { return Likelihood(Theta); }
double Likelihood()
Definition: agm.h:183
TFltV Theta
Definition: agm.h:171
int TLogRegFit::MLEGradient ( const double &  ChangeEps,
const int &  MaxStep,
const TStr  PlotNm 
)

Definition at line 797 of file agm.cpp.

797  {
798  TExeTm ExeTm;
799  TFltV GradV(Theta.Len());
800  int iter = 0;
801  TIntFltPrV IterLV, IterGradNormV;
802  double MinVal = -1e10, MaxVal = 1e10;
803  double GradCutOff = 100000;
804  for(iter = 0; iter < MaxStep; iter++) {
805  Gradient(GradV); //if gradient is going out of the boundary, cut off
806  for(int i = 0; i < Theta.Len(); i++) {
807  if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
808  if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
809  if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; }
810  if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; }
811  }
812  double Alpha = 0.15, Beta = 0.9;
813  //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
814  double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
815  if (TLinAlg::Norm(GradV) < ChangeEps) { break; }
816  for(int i = 0; i < Theta.Len(); i++) {
817  double Change = LearnRate * GradV[i];
818  Theta[i] += Change;
819  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
820  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
821  }
822  if (! PlotNm.Empty()) {
823  double L = Likelihood();
824  IterLV.Add(TIntFltPr(iter, L));
825  IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));
826  }
827 
828  }
829  if (! PlotNm.Empty()) {
830  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
831  TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
832  printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
833  }
834  return iter;
835 }
static double Norm(const TFltV &x)
Definition: linalg.cpp:324
Definition: tm.h:355
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double Likelihood()
Definition: agm.h:183
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
const char * GetTmStr() const
Definition: tm.h:370
TFltV Theta
Definition: agm.h:171
bool Empty() const
Definition: dt.h:488
double GetStepSizeByLineSearch(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Definition: agm.cpp:837
void Gradient(TFltV &GradV)
Definition: agm.cpp:869
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
int TLogRegFit::MLENewton ( const double &  ChangeEps,
const int &  MaxStep,
const TStr  PlotNm 
)

Definition at line 770 of file agm.cpp.

770  {
771  TExeTm ExeTm;
772  TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
773  TFltVV HVV(Theta.Len(), Theta.Len());
774  int iter = 0;
775  double MinVal = -1e10, MaxVal = 1e10;
776  for(iter = 0; iter < MaxStep; iter++) {
777  Gradient(GradV);
778  Hessian(HVV);
779  GetNewtonStep(HVV, GradV, DeltaLV);
780  double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
781  if (Increment <= ChangeEps) {break;}
782  double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
783  for(int i = 0; i < Theta.Len(); i++) {
784  double Change = LearnRate * DeltaLV[i];
785  Theta[i] += Change;
786  if(Theta[i] < MinVal) { Theta[i] = MinVal;}
787  if(Theta[i] > MaxVal) { Theta[i] = MaxVal;}
788  }
789  }
790  if (! PlotNm.Empty()) {
791  printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
792  }
793 
794  return iter;
795 }
void GetNewtonStep(TFltVV &HVV, const TFltV &GradV, TFltV &DeltaLV)
Definition: agm.cpp:718
Definition: tm.h:355
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
const char * GetTmStr() const
Definition: tm.h:370
TFltV Theta
Definition: agm.h:171
Definition: ds.h:2157
void Hessian(TFltVV &HVV)
Definition: agm.cpp:746
bool Empty() const
Definition: dt.h:488
double GetStepSizeByLineSearch(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Definition: agm.cpp:837
void Gradient(TFltV &GradV)
Definition: agm.cpp:869
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165

Member Data Documentation

int TLogRegFit::M
private

Definition at line 172 of file agm.h.

TFltV TLogRegFit::Theta
private

Definition at line 171 of file agm.h.

TVec<TFltV> TLogRegFit::X
private

Definition at line 169 of file agm.h.

TFltV TLogRegFit::Y
private

Definition at line 170 of file agm.h.


The documentation for this class was generated from the following files: