36 GraphAttributes(GraphAttributes), K(K), Lambda(Lambda) {
39 for (
int k = 0; k <
K; k++) {
40 for (
int f = 0; f < GraphAttributes->
NFeatures; f++) {
106 if (lHat.
Len() == 0) {
111 if (lHat.
Len() == 0) {
117 TInt TruePositives = 0;
118 TInt FalsePositives = 0;
119 TInt FalseNegatives = 0;
124 if (!lHat.
IsKey(c)) {
131 LabelLoss += 0.5/l.
Len();
145 LabelLoss += 0.5/(N - l.
Len());
153 if ((lHat.
Len() == 0 || TruePositives == 0) && Which ==
fScore) {
156 TFlt precision = (1.0*TruePositives)/lHat.
Len();
157 TFlt recall = (1.0*TruePositives)/l.
Len();
159 return 1 - 2 * (precision*recall) / (precision + recall);
195 const char* GroundTruthPath) :
198 FILE* f = fopen(NodeFeaturePath,
"r");
201 fscanf(f,
"%d %d", &NNodes, &nF);
204 for (
int i = 0; i < NNodes; i++) {
206 fscanf(f,
"%d", &nid);
209 printf(
"Warning: %d is not a node in G.\n", nid);
212 for (
int x = 0; x < nF; x++) {
225 f = fopen(GroundTruthPath,
"r");
227 printf(
"Groundtruth file %s not found.\n", GroundTruthPath);
230 char* CircleName =
new char [1000];
231 while (fscanf(f,
"%s", CircleName) == 1)
236 fscanf(f,
"%d", &nid);
241 if (c ==
'\n')
break;
242 if (c >=
'0' && c <=
'9') {
247 if (c ==
'\n')
break;
251 delete [] CircleName;
263 TInt k = it.GetKey();
266 diff = abs(it.GetDat() -
NodeFeatures.GetDat(nj).GetDat(k));
268 diff = abs(it.GetDat());
276 TInt k = it.GetKey();
279 diff = abs(it.GetDat() -
NodeFeatures.GetDat(ni).GetDat(k));
281 diff = abs(it.GetDat());
297 for (
int OuterRep = 0; OuterRep < OuterReps; OuterRep++) {
300 for (
int k = 0; k <
K; k++) {
301 if (OuterRep == 0 ||
CHat[k].Empty() ||
CHat[k].Len()
318 for (
int k = 0; k <
K; k++) {
325 for (
int gradientRep = 0; gradientRep < GradientReps; gradientRep++) {
335 if (ll < llPrevious) {
344 printf(
"\nIteration %d, ll = %f\n", OuterRep + 1, (
double) ll);
352 res += it.GetDat() * Parameter[it.GetKey()];
387 for (
int l = 0; l <
K; l++) {
391 TFlt d = (
CHat[l].IsKey(Src) &&
CHat[l].IsKey(Dst)) ? 1 : -1;
399 CostNotInclude = -Other + InnerProduct + log(1 + exp(Other - InnerProduct));
400 CostInclude = -Other - InnerProduct + log(1 + exp(Other + InnerProduct));
402 CostNotInclude = log(1 + exp(Other - InnerProduct));
403 CostInclude = log(1 + exp(Other + InnerProduct));
406 CostNotIncludeHash.
AddDat(kv) = -CostNotInclude;
407 CostIncludeHash.
AddDat(kv) = -CostInclude;
411 TFlt InitialTemperature = 1.0;
412 for (
int r = 2; r < MCMCReps + 2; r++) {
413 TFlt Temperature = InitialTemperature / log((
double) r);
427 if (NewLabel[np] == 0) {
432 l1 += CostIncludeHash.
GetDat(kv);
435 TFlt LogLikelihoodDiff = exp(l1 - l0);
436 TFlt AcceptProb = pow(LogLikelihoodDiff, 1.0 / Temperature);
467 TFlt InnerProduct = 0;
468 TIntPr Edge = it.GetKey();
472 for (
int k = 0; k <
K; k++) {
473 TFlt d =
CHat[k].IsKey(Src) &&
CHat[k].IsKey(Dst) ? 1 : -1;
476 TFlt expinp = exp(InnerProduct);
477 TFlt q = expinp / (1 + expinp);
482 for (
int k = 0; k <
K; k++) {
484 TFlt d = d_ ? 1 : -1;
486 !itf.IsEnd(); itf++) {
487 TInt i = itf.GetKey();
488 TInt f = itf.GetDat();
503 TFlt InnerProduct = 0;
504 TIntPr Edge = it.GetKey();
509 for (
int k = 0; k <
K; k++) {
510 TFlt d =
CHat[k].IsKey(Src) &&
CHat[k].IsKey(Dst) ? 1 : -1;
516 TFlt ll_ = log(1 + exp(InnerProduct));
521 printf(
"ll isnan\n");
TPair< TInt, TInt > TIntPr
TIntSet MCMC(TInt k, TInt MCMCReps)
Optimize the cluster assignments for the k'th cluster.
TSizeTy Len() const
Returns the number of elements in the vector.
TCluster(PGraphAttributes GraphAttributes, TInt K, TFlt Lambda)
const TDat & GetDat(const TKey &Key) const
TVec< TIntSet > GetCircles(void)
bool IsKey(const TKey &Key) const
TPt< TGraphAttributes > PGraphAttributes
void Train(TInt OuterReps, TInt GradientReps, TInt MCMCReps)
Train the model to predict K Clusters.
THash< TInt, TIntIntH > NodeFeatures
const TDat & GetDat() const
bool IsEnd() const
Tests whether the iterator is pointing to the past-end element.
THash< TIntPr, TIntIntH > EdgeFeatures
TGraphAttributes(PUNGraph G, const char *nodeFeaturePath, const char *groundtruthPath)
int AddKey(const TKey &Key)
TVec< TIntSet > GroundTruth
TFlt Inner(TIntIntH &Feature, TFlt *Parameter)
Inner product for sparse features.
void Gradient()
Update partial derivatives of log-likelihood.
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
int GetUniDevInt(const int &Range=0)
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
TFlt Loss(TIntSet &l, TIntSet lHat, int N, int Which)
Compute the loss between a GroundTruth cluster l and a predicted cluster lHat.
TDat & AddDat(const TKey &Key)
TFlt LogLikelihood()
Compute the log-likelihood of Parameters and cluster assignments.
PGraphAttributes GraphAttributes