SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
table.h
Go to the documentation of this file.
1 #ifndef TABLE_H
2 #define TABLE_H
3 
5 typedef enum {NOT, AND, OR, NOP} TPredOp;
7 typedef enum {LT = 0, LTE, EQ, NEQ, GTE, GT, SUBSTR, SUPERSTR} TPredComp;
8 
9 class TAtomicPredicate;
10 class TPredicateNode;
11 class TPredicate;
12 
13 //#//////////////////////////////////////////////
16  private:
25  // OP RS: 2014/03/25, NonAtom does not work with Snap.py
26  //protected:
27  //static const TAtomicPredicate NonAtom;
28  public:
30  TAtomicPredicate() : Type(atInt), IsConst(true),
31  Compare(EQ), Lvar(""), Rvar(""),
32  IntConst(0), FltConst(0), StrConst("") {}
33  //TAtomicPredicate() : Type(NonAtom.Type), IsConst(NonAtom.IsConst),
34  // Compare(NonAtom.Compare), Lvar(NonAtom.Lvar), Rvar(NonAtom.Rvar),
35  // IntConst(NonAtom.IntConst), FltConst(NonAtom.FltConst), StrConst(NonAtom.StrConst) {}
38  TInt ICnst, TFlt FCnst, TStr SCnst) : Type(Typ), IsConst(IsCnst),
39  Compare(Cmp), Lvar(L), Rvar(R), IntConst(ICnst), FltConst(FCnst),
40  StrConst(SCnst) {}
43  Type(Typ), IsConst(IsCnst), Compare(Cmp), Lvar(L), Rvar(R), IntConst(0),
44  FltConst(0), StrConst("") {}
45  friend class TPredicate;
46  friend class TPredicateNode;
47 };
48 
49 //#//////////////////////////////////////////////
52  public:
59  TPredicateNode(): Op(NOP), Result(false), Atom(), Parent(NULL), Left(NULL),
61  Right(NULL) {}
63  TPredicateNode(const TAtomicPredicate& A): Op(NOP), Result(false), Atom(A),
64  Parent(NULL), Left(NULL), Right(NULL) {}
66  TPredicateNode(TPredOp Opr): Op(Opr), Result(false), Atom(), Parent(NULL),
67  Left(NULL), Right(NULL) {}
69  TPredicateNode(const TPredicateNode& P): Op(P.Op), Result(P.Result), Atom(P.Atom),
70  Parent(P.Parent), Left(P.Left), Right(P.Right) {}
72  void AddLeftChild(TPredicateNode* Child) { Left = Child; Child->Parent = this; }
74  void AddRightChild(TPredicateNode* Child) { Right = Child; Child->Parent = this; }
76  void GetVariables(TStrV& Variables);
77  friend class TPredicate;
78 };
79 
80 //#//////////////////////////////////////////////
82 class TPredicate {
83  protected:
88  public:
90  TPredicate() : IntVars(), FltVars(), StrVars() {}
92  TPredicate(TPredicateNode* R) : IntVars(), FltVars(), StrVars(), Root(R) {}
94  TPredicate(const TPredicate& Pred) : IntVars(Pred.IntVars), FltVars(Pred.FltVars), StrVars(Pred.StrVars), Root(Pred.Root) {}
96  void GetVariables(TStrV& Variables);
98  void SetIntVal(TStr VarName, TInt VarVal) { IntVars.AddDat(VarName, VarVal); }
100  void SetFltVal(TStr VarName, TFlt VarVal) { FltVars.AddDat(VarName, VarVal); }
102  void SetStrVal(TStr VarName, TStr VarVal) { StrVars.AddDat(VarName, VarVal); }
104  TBool Eval();
107 
109  template <class T>
110  static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp) {
111  switch (Cmp) {
112  case LT: return Val1 < Val2;
113  case LTE: return Val1 <= Val2;
114  case EQ: return Val1 == Val2;
115  case NEQ: return Val1 != Val2;
116  case GTE: return Val1 >= Val2;
117  case GT: return Val1 > Val2;
118  default: return false;
119  }
120  };
121 
123  static TBool EvalStrAtom(const TStr& Val1, const TStr& Val2, TPredComp Cmp) {
124  switch (Cmp) {
125  case LT: return Val1 < Val2;
126  case LTE: return Val1 <= Val2;
127  case EQ: return Val1 == Val2;
128  case NEQ: return Val1 != Val2;
129  case GTE: return Val1 >= Val2;
130  case GT: return Val1 > Val2;
131  case SUBSTR: return Val2.IsStrIn(Val1);
132  case SUPERSTR: return Val1.IsStrIn(Val2);
133  default: return false;
134  }
135  }
136 };
137 
138 //#//////////////////////////////////////////////
140 class TTable;
142 typedef TPt<TTable> PTable;
143 
146 
148 // Haversine distance is used to calculate distance between two points on a sphere based on latitude and longitude
150 
151 #if 0
152 // TMetric and TEuclideanMetric are currently not used, kept for future use
153 //#//////////////////////////////////////////////
155 class TMetric {
156 protected:
157  TStr MetricName;
158 public:
159  TMetric(TStr Name) : MetricName(Name) {}
161  TStr GetName();
163  virtual TFlt NumDist(TFlt,TFlt) { return -1; }
165  virtual TFlt StrDist(TStr,TStr) { return -1; }
166 };
167 
168 //#//////////////////////////////////////////////
170 class TEuclideanMetric: public TMetric {
171 public:
172  TEuclideanMetric(TStr Name) : TMetric(Name) {}
174  TFlt NumDist(TFlt x1,TFlt x2) { return fabs(x1-x2); }
175 };
176 #endif
177 
178 //#//////////////////////////////////////////////
181 protected:
183  friend class TTable;
184 
185 public:
189  TTableContext(TSIn& SIn): StringVals(SIn) {}
191  void Load(TSIn& SIn) { StringVals.Load(SIn); }
193  void LoadShM(TShMIn& ShMIn) {
194  StringVals.LoadShM(ShMIn, true);
195  }
197  void Save(TSOut& SOut) { StringVals.Save(SOut); }
199  TInt AddStr(const TStr& Key) {
200  TInt KeyId = TInt(StringVals.AddKey(Key));
201  return(KeyId);
202  }
204  TStr GetStr(const TInt& KeyId) const {
205  return StringVals.GetKey(KeyId);
206  }
207 };
208 
209 //#//////////////////////////////////////////////
211 class TPrimitive {
212 private:
217 
218 public:
219  TPrimitive() : IntVal(-1), FltVal(-1), StrVal(""), AttrType(atInt) {}
220  TPrimitive(const TInt& Val) : IntVal(Val), FltVal(-1), StrVal(""), AttrType(atInt) {}
221  TPrimitive(const TFlt& Val) : IntVal(-1), FltVal(Val), StrVal(""), AttrType(atFlt) {}
222  TPrimitive(const TStr& Val) : IntVal(-1), FltVal(-1), StrVal(Val.CStr()), AttrType(atStr) {}
223  TPrimitive(const TPrimitive& Prim) : IntVal(Prim.IntVal), FltVal(Prim.FltVal),
224  StrVal(Prim.StrVal.CStr()), AttrType(Prim.AttrType) {}
225 
226  TInt GetInt() const { return IntVal; }
227  TFlt GetFlt() const { return FltVal; }
228  TStr GetStr() const { return StrVal; }
229  TAttrType GetType() const { return AttrType; }
230 };
231 
232 //#//////////////////////////////////////////////
234 class TTableRow {
235 protected:
239 public:
243  void AddInt(const TInt& Val) { IntVals.Add(Val); }
245  void AddFlt(const TFlt& Val) { FltVals.Add(Val); }
247  void AddStr(const TStr& Val) { StrVals.Add(Val); }
249  TIntV GetIntVals() const { return IntVals; }
251  TFltV GetFltVals() const { return FltVals; }
253  TStrV GetStrVals() const { return StrVals; }
254 };
255 
260 
263 
264 //#//////////////////////////////////////////////
266 class GroupStmt{
267 protected:
272 public:
273  GroupStmt(): GroupByAttrs(TStrV()), Ordered(true), UsePhysicalRowIds(true), Valid(true){}
274  GroupStmt(const TStrV& Attrs): GroupByAttrs(Attrs), Ordered(true), UsePhysicalRowIds(true), Valid(true){}
275  GroupStmt(const TStrV& Attrs, TBool ordered, TBool physical): GroupByAttrs(Attrs), Ordered(ordered), UsePhysicalRowIds(physical), Valid(true){}
276  GroupStmt(const GroupStmt& stmt): GroupByAttrs(stmt.GroupByAttrs), Ordered(stmt.Ordered), UsePhysicalRowIds(stmt.UsePhysicalRowIds), Valid(stmt.Valid){}
278  TBool operator ==(const GroupStmt& stmt) const{
279  if(stmt.Ordered != Ordered || stmt.UsePhysicalRowIds != UsePhysicalRowIds){ return false;}
280  if(stmt.GroupByAttrs.Len() != GroupByAttrs.Len()){ return false;}
281  for(int i = 0; i < GroupByAttrs.Len(); i++){
282  if(stmt.GroupByAttrs[i] != GroupByAttrs[i]){ return false;}
283  }
284  return true;
285  }
286  TBool IsValid(){ return Valid;}
287  void Invalidate(){ Valid = false;}
288  TBool IncludesAttr(const TStr& Attr){
289  for(int i = 0; i < GroupByAttrs.Len(); i++){
290  if(GroupByAttrs[i] == Attr){ return true;}
291  }
292  return false;
293  }
294  TSize GetMemUsed() const{
295  TSize sz = 3 * sizeof(TBool);
296  sz += GroupByAttrs.GetMemUsed();
297  for(int i = 0; i < GroupByAttrs.Len(); i++){
298  sz += GroupByAttrs[i].GetMemUsed();
299  }
300  return sz;
301  }
302 
303  int GetPrimHashCd() const{
304  int hc1 = GroupByAttrs.GetPrimHashCd();
306  int hc2 = flags.GetPrimHashCd();
307  return TPairHashImpl::GetHashCd(hc1, hc2);
308  }
309 
310  int GetSecHashCd() const{
311  int hc1 = GroupByAttrs.GetSecHashCd();
313  int hc2 = flags.GetSecHashCd();
314  return TPairHashImpl::GetHashCd(hc1, hc2);
315  }
316 
317  void Print(){
318  for(int i = 0; i < GroupByAttrs.Len(); i++){
319  printf("%s ", GroupByAttrs[i].CStr());
320  }
321  printf("Ordered: %d, UsePhysicalRows: %d, Valid: %d\n", Ordered.Val, UsePhysicalRowIds.Val, Valid.Val);
322  }
323 };
324 
325 //#//////////////////////////////////////////////
327 
332  const TTable* Table;
333 public:
335  TRowIterator(): CurrRowIdx(0), Table(NULL) {}
337  TRowIterator(TInt RowIdx, const TTable* TablePtr): CurrRowIdx(RowIdx), Table(TablePtr) {}
339  TRowIterator(const TRowIterator& RowI): CurrRowIdx(RowI.CurrRowIdx), Table(RowI.Table) {}
341  TRowIterator& operator++(int);
343  TRowIterator& Next();
345  bool operator < (const TRowIterator& RowI) const;
347  bool operator == (const TRowIterator& RowI) const;
349  TInt GetRowIdx() const;
351  TInt GetIntAttr(TInt ColIdx) const;
353  TFlt GetFltAttr(TInt ColIdx) const;
355  TStr GetStrAttr(TInt ColIdx) const;
357  TInt GetStrMapById(TInt ColIdx) const;
359  TInt GetIntAttr(const TStr& Col) const;
361  TFlt GetFltAttr(const TStr& Col) const;
363  TStr GetStrAttr(const TStr& Col) const;
365  TInt GetStrMapByName(const TStr& Col) const;
367  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
369  TBool CompareAtomicConstTStr(TInt ColIdx, const TStr& Val, TPredComp Cmp);
370 };
371 
372 //#//////////////////////////////////////////////
378 public:
380  TRowIteratorWithRemove(): CurrRowIdx(0), Table(NULL), Start(true) {}
382  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr);
384  TRowIteratorWithRemove(TInt RowIdx, TTable* TablePtr, TBool IsStart) : CurrRowIdx(RowIdx),
385  Table(TablePtr), Start(IsStart) {}
387  TRowIteratorWithRemove(const TRowIteratorWithRemove& RowI) : CurrRowIdx(RowI.CurrRowIdx),
388  Table(RowI.Table), Start(RowI.Start) {}
394  bool operator < (const TRowIteratorWithRemove& RowI) const;
396  bool operator == (const TRowIteratorWithRemove& RowI) const;
398  TInt GetRowIdx() const;
400  TInt GetNextRowIdx() const;
402  TInt GetNextIntAttr(TInt ColIdx) const;
404  TFlt GetNextFltAttr(TInt ColIdx) const;
406  TStr GetNextStrAttr(TInt ColIdx) const;
408  TInt GetNextIntAttr(const TStr& Col) const;
410  TFlt GetNextFltAttr(const TStr& Col) const;
412  TStr GetNextStrAttr(const TStr& Col) const;
414  TBool IsFirst() const;
416  void RemoveNext();
418  TBool CompareAtomicConst(TInt ColIdx, const TPrimitive& Val, TPredComp Cmp);
419 };
420 
421 //#//////////////////////////////////////////////
426 public:
428  TTableIterator(TVec<PTable>& PTableV): PTableV(PTableV), CurrTableIdx(0) {}
430  PTable Next() { return PTableV[CurrTableIdx++]; }
432  bool HasNext() { return CurrTableIdx < PTableV.Len(); }
433 };
434 
436 namespace TSnap {
438  template<class PGraph> PGraph ToGraph(PTable Table,
439  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
441  template<class PGraph> PGraph ToNetwork(PTable Table,
442  const TStr& SrcCol, const TStr& DstCol,
443  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
444  TAttrAggr AggrPolicy);
446  template<class PGraph> PGraph ToNetwork(PTable Table,
447  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
448  template<class PGraph> PGraph ToNetwork(PTable Table,
449  const TStr& SrcCol, const TStr& DstCol,
450  TStrV& EdgeAttrV,
451  TAttrAggr AggrPolicy);
452  template<class PGraph> PGraph ToNetwork(PTable Table,
453  const TStr& SrcCol, const TStr& DstCol,
454  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
455  TAttrAggr AggrPolicy);
456  int LoadCrossNet(TCrossNet& Graph, PTable Table,
457  const TStr& SrcCol, const TStr& DstCol,
458  TStrV& EdgeAttrV);
459  int LoadMode(TModeNet& Graph, PTable Table, const TStr& NCol,
460  TStrV& NodeAttrV);
461 
462 #ifdef GCC_ATOMIC
463  template<class PGraphMP> PGraphMP ToGraphMP(PTable Table,
464  const TStr& SrcCol, const TStr& DstCol);
465  template<class PGraphMP> PGraphMP ToGraphMP3(PTable Table,
466  const TStr& SrcCol, const TStr& DstCol);
467  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
468  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
469  template<class PGraphMP> PGraphMP ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol,
470  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
471  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
472  TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
473  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
474  TAttrAggr AggrPolicy);
475  template<class PGraphMP> PGraphMP ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
476  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
477 
478 
479 #endif // GCC_ATOMIC
480 }
481 
482 //#//////////////////////////////////////////////
484 class TTable {
485 protected:
486  static const TInt Last;
487  static const TInt Invalid;
488 
489  static TInt UseMP;
490 public:
491  template<class PGraph> friend PGraph TSnap::ToGraph(PTable Table,
492  const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
493  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
494  const TStr& SrcCol, const TStr& DstCol,
495  TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs,
497  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
498  const TStr& SrcCol, const TStr& DstCol,
500  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
501  const TStr& SrcCol, const TStr& DstCol,
502  TStrV& EdgeAttrV,
504  template<class PGraph> friend PGraph TSnap::ToNetwork(PTable Table,
505  const TStr& SrcCol, const TStr& DstCol,
506  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV,
508  friend int TSnap::LoadCrossNet(TCrossNet& Graph, PTable Table,
509  const TStr& SrcCol, const TStr& DstCol,
510  TStrV& EdgeAttrV);
511  friend int TSnap::LoadMode(TModeNet& Graph, PTable Table,
512  const TStr& NCol, TStrV& NodeAttrV);
513 
514 #ifdef GCC_ATOMIC
515  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP(PTable Table, const TStr& SrcCol, const TStr& DstCol);
516  template<class PGraphMP> friend PGraphMP TSnap::ToGraphMP3(PTable Table, const TStr& SrcCol, const TStr& DstCol);
517  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
518  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP2(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrs, TStrV& DstAttrs, TStrV& EdgeAttrs, TAttrAggr AggrPolicy);
519  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TStrV& EdgeAttrV, TAttrAggr AggrPolicy);
520  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol, TAttrAggr AggrPolicy);
521  template<class PGraphMP> friend PGraphMP TSnap::ToNetworkMP(PTable Table, const TStr& SrcCol, const TStr& DstCol,
522  TStrV& EdgeAttrV, PTable NodeTable, const TStr& NodeCol, TStrV& NodeAttrV, TAttrAggr AggrPolicy);
523 
524 #endif // GCC_ATOMIC
525 
526  static void SetMP(TInt Value) { UseMP = Value; }
527  static TInt GetMP() { return UseMP; }
528 
530  static TStr NormalizeColName(const TStr& ColName) {
531  TStr Result = ColName;
532  int RLen = Result.Len();
533  if (RLen == 0) { return Result; }
534  if (Result.GetCh(0) == '_') { return Result; }
535  if (RLen >= 2 && Result.GetCh(RLen-2) == '-') { return Result; }
536  return Result + "-1";
537  }
539  static TStrV NormalizeColNameV(const TStrV& Cols) {
540  TStrV NCols;
541  for (TInt i = 0; i < Cols.Len(); i++) { NCols.Add(NormalizeColName(Cols[i])); }
542  return NCols;
543  }
544 protected:
546 
556 
561 
567 
571 
572  // Group mapping data structures.
574 
578 
582 
585  void InvalidatePhysicalGroupings(); // to be called when rows are added / physically removed
586  void InvalidateAffectedGroupings(const TStr& Attr); // to be called when attributes are removed (projected) or values updated in-place
587 
588  // Fields to be used when constructing a graph.
595 
602 
604 
605 /***** Utility functions *****/
606 public:
608  void AddIntCol(const TStr& ColName);
610  void AddFltCol(const TStr& ColName);
612  void AddStrCol(const TStr& ColName);
613 protected:
615  void IncrementNext();
617  void ClassifyAux(const TIntV& SelectedRows, const TStr& LabelName,
618  const TInt& PositiveLabel = 1, const TInt& NegativeLabel= 0);
619 
620 /***** Utility functions for handling string values *****/
622  const char* GetContextKey(TInt Val) const {
623  return Context->StringVals.GetKey(Val);
624  }
626  TStr GetStrValIdx(TInt ColIdx, TInt RowIdx) const {
627  return TStr(Context->StringVals.GetKey(StrColMaps[ColIdx][RowIdx]));
628  }
630  void AddStrVal(const TInt& ColIdx, const TStr& Val);
632  void AddStrVal(const TStr& Col, const TStr& Val);
633 
634 /***** Utility functions for handling Schema *****/
636  TStr GetIdColName() const { return IdColName; }
638  TStr GetSchemaColName(TInt Idx) const { return Sch[Idx].Val1; }
640  TAttrType GetSchemaColType(TInt Idx) const { return Sch[Idx].Val2; }
642  void AddSchemaCol(const TStr& ColName, TAttrType ColType) {
643  TStr NColName = NormalizeColName(ColName);
644  Sch.Add(TPair<TStr,TAttrType>(NColName, ColType));
645  }
646  TBool IsColName(const TStr& ColName) const {
647  TStr NColName = NormalizeColName(ColName);
648  return ColTypeMap.IsKey(NColName);
649  }
651  void AddColType(const TStr& ColName, TPair<TAttrType,TInt> ColType) {
652  TStr NColName = NormalizeColName(ColName);
653  ColTypeMap.AddDat(NColName, ColType);
654  }
656  void AddColType(const TStr& ColName, TAttrType ColType, TInt Index) {
657  TStr NColName = NormalizeColName(ColName);
658  AddColType(NColName, TPair<TAttrType,TInt>(ColType, Index));
659  }
661  void DelColType(const TStr& ColName) {
662  TStr NColName = NormalizeColName(ColName);
663  ColTypeMap.DelKey(NColName);
664  }
666  TPair<TAttrType, TInt> GetColTypeMap(const TStr& ColName) const {
667  TStr NColName = NormalizeColName(ColName);
668  return ColTypeMap.GetDat(NColName);
669  }
671  TStr RenumberColName(const TStr& ColName) const;
673  TStr DenormalizeColName(const TStr& ColName) const;
675  Schema DenormalizeSchema() const;
677  TBool IsAttr(const TStr& Attr);
678 
679 /***** Utility functions for adding rows and tables to TTable *****/
681  void AddTable(const TTable& T);
683  void ConcatTable(const PTable& T) {AddTable(*T); Reindex(); }
684 
686  void AddRowI(const TRowIterator& RI);
688  void AddRowV(const TIntV& IntVals, const TFltV& FltVals, const TStrV& StrVals);
689 
690 /***** Utility functions for building graph from TTable *****/
692  void AddGraphAttribute(const TStr& Attr, TBool IsEdge, TBool IsSrc, TBool IsDst);
694  void AddGraphAttributeV(TStrV& Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst);
696  void CheckAndAddIntNode(PNEANet Graph, THashSet<TInt>& NodeVals, TInt NodeId);
698  template<class T> TInt CheckAndAddFltNode(T Graph, THash<TFlt, TInt>& NodeVals, TFlt FNodeVal);
700  void AddEdgeAttributes(PNEANet& Graph, int RowId);
702  void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId,
703  THash<TInt, TStrIntVH>& NodeIntAttrs, THash<TInt, TStrFltVH>& NodeFltAttrs,
704  THash<TInt, TStrStrVH>& NodeStrAttrs);
706  PNEANet BuildGraph(const TIntV& RowIds, TAttrAggr AggrPolicy);
708  void InitRowIdBuckets(int NumBuckets);
710 
713  void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize,
714  TInt StartVal, TInt EndVal);
716 
719  void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals);
721 
725 
730 
734 
736 
739  template <class T> T AggregateVector(TVec<T>& V, TAttrAggr Policy);
740 
741  /***** Grouping Utility functions *************/
743  void GroupingSanityCheck(const TStr& GroupBy, const TAttrType& AttrType) const;
745 
749  template <class T> void GroupByIntCol(const TStr& GroupBy, T& Grouping,
750  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
751 #ifdef GCC_ATOMIC
752  public: //Should be protected - this is for debug only
754  void GroupByIntColMP(const TStr& GroupBy, THashMP<TInt, TIntV>& Grouping, TBool UsePhysicalIds = true) const;
755 #endif // GCC_ATOMIC
756  protected:
758  template <class T> void GroupByFltCol(const TStr& GroupBy, T& Grouping,
759  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
761  template <class T> void GroupByStrCol(const TStr& GroupBy, T& Grouping,
762  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds = true) const;
764  template <class T> void UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const;
765 #ifdef GCC_ATOMIC
766  template <class T> void UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const;
768 #endif // GCC_ATOMIC
769  void PrintGrouping(const THash<TGroupKey, TIntV>& Grouping) const;
770 
771  /***** Utility functions for sorting by columns *****/
773  inline TInt CompareRows(TInt R1, TInt R2, const TAttrType& CompareByType,
774  const TInt& CompareByIndex, TBool Asc = true);
776  inline TInt CompareRows(TInt R1, TInt R2, const TVec<TAttrType>& CompareByTypes,
777  const TIntV& CompareByIndices, TBool Asc = true);
779  TInt GetPivot(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
780  const TIntV& SortByIndices, TBool Asc);
782  TInt Partition(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
783  const TIntV& SortByIndices, TBool Asc);
785  void ISort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
786  const TIntV& SortByIndices, TBool Asc = true);
788  void QSort(TIntV& V, TInt StartIdx, TInt EndIdx, const TVec<TAttrType>& SortByTypes,
789  const TIntV& SortByIndices, TBool Asc = true);
791  void Merge(TIntV& V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec<TAttrType>& SortByTypes,
792  const TIntV& SortByIndices, TBool Asc = true);
793 #ifdef USE_OPENMP
794  void QSortPar(TIntV& V, const TVec<TAttrType>& SortByTypes, const TIntV& SortByIndices,
796  TBool Asc = true);
797 #endif // USE_OPENMP
798 
799 /***** Utility functions for removing rows (not through iterator) *****/
801  bool IsRowValid(TInt RowIdx) const{ return Next[RowIdx] != Invalid;}
805  void RemoveFirstRow();
807  void RemoveRow(TInt RowIdx, TInt PrevRowIdx);
809  void KeepSortedRows(const TIntV& KeepV);
812  for (int i = 0; i < Next.Len(); i++) {
813  if(Next[i] != TTable::Invalid) { FirstValidRow = i; return;}
814  }
815  TExcept::Throw("SetFirstValidRow: Table is empty");
816  }
817 
818 /***** Utility functions for Join *****/
820  PTable InitializeJointTable(const TTable& Table);
822  void AddJointRow(const TTable& T1, const TTable& T2, TInt RowIdx1, TInt RowIdx2);
823 /***** Utility functions for Threshold Join *****/
824  void ThresholdJoinInputCorrectness(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table,
825  const TStr& KeyCol2, const TStr& JoinCol2);
826  void ThresholdJoinCountCollisions(const TTable& TB, const TTable& TS,
827  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
828  THash<TIntPr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
829  PTable ThresholdJoinOutputTable(const THash<TIntPr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
830  void ThresholdJoinCountPerJoinKeyCollisions(const TTable& TB, const TTable& TS,
831  const TIntIntVH& T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS,
832  THash<TIntTr,TIntTr>& Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType);
833  PTable ThresholdJoinPerJoinKeyOutputTable(const THash<TIntTr,TIntTr>& Counters, TInt Threshold, const TTable& Table);
834 
836  void ResizeTable(int RowCount);
838  int GetEmptyRowsStart(int NewRows);
840  void AddSelectedRows(const TTable& Table, const TIntV& RowIDs);
842  void AddNRows(int NewRows, const TVec<TIntV>& IntColsP, const TVec<TFltV>& FltColsP,
843  const TVec<TIntV>& StrColMapsP);
844 #ifdef USE_OPENMP
845  void AddNJointRowsMP(const TTable& T1, const TTable& T2, const TVec<TIntPrV>& JointRowIDSet);
847 #endif // USE_OPENMP
848  void UpdateTableForNewRow();
850 
851 #ifdef GCC_ATOMIC
852  static void LoadSSPar(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
854 #endif // GCC_ATOMIC
855  static void LoadSSSeq(PTable& NewTable, const Schema& S, const TStr& InFNm, const TIntV& RelevantCols, const char& Separator, TBool HasTitleLine);
857 
858 /***** Utility functions for Group *****/
860 
863  void GroupAux(const TStrV& GroupBy, THash<TGroupKey, TPair<TInt, TIntV> >& Grouping,
864  TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = true);
865 #ifdef USE_OPENMP
866  //void GroupAuxMP(const TStrV& GroupBy, THashGenericMP<TGroupKey, TPair<TInt, TIntV> >& Grouping,
868  // TBool Ordered, const TStr& GroupColName, TBool KeepUnique, TIntV& UniqueVec, TBool UsePhysicalIds = false);
869 #endif // USE_OPENMP
870  void StoreGroupCol(const TStr& GroupColName, const TVec<TPair<TInt, TInt> >& GroupAndRowIds);
874  //template<class T> void RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalRows);
875 
877  void Reindex();
879  void AddIdColumn(const TStr& IdColName);
880 
881  static TInt CompareKeyVal(const TInt& K1, const TInt& V1, const TInt& K2, const TInt& V2);
882  static TInt CheckSortedKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
883  static void ISortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
884  static TInt GetPivotKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
885  static TInt PartitionKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
886  static void QSortKeyVal(TIntV& Key, TIntV& Val, TInt Start, TInt End);
887 
889  void GetCollidingRows(const TTable& T, THashSet<TInt>& Collisions);
890 private:
891  class TLoadVecInit {
892  public:
894  template<typename TElem>
895  void operator() (TVec<TElem>* Node, TShMIn& ShMIn) {Node->LoadShM(ShMIn);}
896  };
897 private:
898  void GenerateColTypeMap(THash<TStr,TPair<TInt,TInt> > & ColTypeIntMap);
899  void LoadTableShM(TShMIn& ShMIn, TTableContext* ContextTable);
900 
901 
902 public:
903 /***** Constructors *****/
904  TTable();
905  TTable(TTableContext* Context);
906  TTable(const Schema& S, TTableContext* Context);
907  TTable(TSIn& SIn, TTableContext* Context);
908 
910  TTable(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
911  TTableContext* Context, const TBool IsStrKeys = false);
913  TTable(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
914  TTableContext* Context, const TBool IsStrKeys = false);
915  // TTable(const TStr& TableName, const THash<TInt,TStr>& H, const TStr& Col1,
916  // const TStr& Col2, TTableContext* Context);
917 
919  TTable(const TTable& Table): Context(Table.Context), Sch(Table.Sch),
920  NumRows(Table.NumRows), NumValidRows(Table.NumValidRows), FirstValidRow(Table.FirstValidRow),
921  LastValidRow(Table.LastValidRow), Next(Table.Next), IntCols(Table.IntCols),
922  FltCols(Table.FltCols), StrColMaps(Table.StrColMaps), ColTypeMap(Table.ColTypeMap),
923  IdColName(Table.IdColName), RowIdMap(Table.RowIdMap), GroupStmtNames(Table.GroupStmtNames),
924  GroupIDMapping(Table.GroupIDMapping), GroupMapping(Table.GroupMapping),
925  SrcCol(Table.SrcCol), DstCol(Table.DstCol),
926  EdgeAttrV(Table.EdgeAttrV), SrcNodeAttrV(Table.SrcNodeAttrV),
927  DstNodeAttrV(Table.DstNodeAttrV), CommonNodeAttrs(Table.CommonNodeAttrs),
928  IsNextDirty(Table.IsNextDirty) {}
929 
930  TTable(const TTable& Table, const TIntV& RowIds);
931 
932  static PTable New() { return new TTable(); }
933  static PTable New(TTableContext* Context) { return new TTable(Context); }
934  static PTable New(const Schema& S, TTableContext* Context) {
935  return new TTable(S, Context);
936  }
938  static PTable New(const THash<TInt,TInt>& H, const TStr& Col1,
939  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
940  return new TTable(H, Col1, Col2, Context, IsStrKeys);
941  }
943  static PTable New(const THash<TInt,TFlt>& H, const TStr& Col1,
944  const TStr& Col2, TTableContext* Context, const TBool IsStrKeys = false) {
945  return new TTable(H, Col1, Col2, Context, IsStrKeys);
946  }
948  static PTable New(const PTable Table) { return new TTable(*Table); }
950  // static PTable New(const PTable Table, const TStr& TableName) {
951  // PTable T = New(Table); T->Name = TableName;
952  // return T;
953  // }
955  static void GetSchema(const TStr& InFNm, Schema& S, const char& Separator = '\t');
956 /***** Save / Load functions *****/
958  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
959  const char& Separator = '\t', TBool HasTitleLine = false);
961  static PTable LoadSS(const Schema& S, const TStr& InFNm, TTableContext* Context,
962  const TIntV& RelevantCols, const char& Separator = '\t', TBool HasTitleLine = false);
964  void SaveSS(const TStr& OutFNm);
966  void SaveBin(const TStr& OutFNm);
968 
971  static PTable Load(TSIn& SIn, TTableContext* Context){ return new TTable(SIn, Context);}
973 
975  static PTable LoadShM(TShMIn& ShMIn, TTableContext* Context) {
976  TTable* Table = new TTable();
977  Table->LoadTableShM(ShMIn, Context);
978  return PTable(Table);
979  }
981 
983  void Save(TSOut& SOut);
985  void Dump(FILE *OutF=stdout) const;
986 
988  static PTable TableFromHashMap(const THash<TInt,TInt>& H, const TStr& Col1, const TStr& Col2,
989  TTableContext* Context, const TBool IsStrKeys = false) {
990  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
991  T->InitIds();
992  return T;
993  }
995  static PTable TableFromHashMap(const THash<TInt,TFlt>& H, const TStr& Col1, const TStr& Col2,
996  TTableContext* Context, const TBool IsStrKeys = false) {
997  PTable T = New(H, Col1, Col2, Context, IsStrKeys);
998  T->InitIds();
999  return T;
1000  }
1002  void AddRow(const TTableRow& Row) { AddRowV(Row.GetIntVals(), Row.GetFltVals(), Row.GetStrVals()); };
1003 
1006  return Context;
1007  }
1010 
1011 /***** Value Getters - getValue(column name, physical row Idx) *****/
1013  TInt GetColIdx(const TStr& ColName) const {
1014  TStr NColName = NormalizeColName(ColName);
1015  return ColTypeMap.IsKey(NColName) ? ColTypeMap.GetDat(NColName).Val2 : TInt(-1);
1016  }
1017 
1018  // No type checking. Assuming ColName actually refers to the right type.
1020  TInt GetIntVal(const TStr& ColName, const TInt& RowIdx) {
1021  return IntCols[GetColIdx(ColName)][RowIdx];
1022  }
1024  TFlt GetFltVal(const TStr& ColName, const TInt& RowIdx) {
1025  return FltCols[GetColIdx(ColName)][RowIdx];
1026  }
1028  TStr GetStrVal(const TStr& ColName, const TInt& RowIdx) const {
1029  return GetStrValIdx(GetColIdx(ColName), RowIdx);
1030  }
1031 
1033  TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const {
1034  return StrColMaps[ColIdx][RowIdx];
1035  }
1036 
1038  TInt GetStrMapByName(const TStr& ColName, TInt RowIdx) const {
1039  return StrColMaps[GetColIdx(ColName)][RowIdx];
1040  }
1041 
1043  TStr GetStrValById(TInt ColIdx, TInt RowIdx) const {
1044  return GetStrValIdx(ColIdx, RowIdx);
1045  }
1046 
1048  TStr GetStrValByName(const TStr& ColName, const TInt& RowIdx) const {
1049  return GetStrVal(ColName, RowIdx);
1050  }
1051 
1053 
1059  TIntV GetIntRowIdxByVal(const TStr& ColName, const TInt& Val) const;
1061 
1067  TIntV GetStrRowIdxByMap(const TStr& ColName, const TInt& Map) const;
1069 
1075  TIntV GetFltRowIdxByVal(const TStr& ColName, const TFlt& Val) const;
1076 
1078 
1086  TInt RequestIndexInt(const TStr& ColName);
1088 
1096  TInt RequestIndexFlt(const TStr& ColName);
1098 
1106  TInt RequestIndexStrMap(const TStr& ColName);
1107 
1109  TStr GetStr(const TInt& KeyId) const {
1110  return Context->StringVals.GetKey(KeyId);
1111  }
1112 
1113 /***** Value Getters - getValue(col idx, row Idx) *****/
1114  // No type and bound checking
1116  TInt GetIntValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1117  return IntCols[ColIdx][RowIdx];
1118  }
1120  TFlt GetFltValAtRowIdx(const TInt& ColIdx, const TInt& RowIdx) {
1121  return FltCols[ColIdx][RowIdx];
1122  }
1123 
1126 
1127 /***** Graph handling *****/
1129  TVec<PNEANet> ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy,
1130  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1132  TVec<PNEANet> ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1134  TVec<PNEANet> ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy);
1135 
1137 
1140  PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy,
1141  TInt WindowSize, TInt JumpSize, TInt StartVal = TInt::Mn, TInt EndVal = TInt::Mx);
1143 
1146  PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals);
1148 
1151  PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy);
1156 
1158  TStr GetSrcCol() const { return SrcCol; }
1160  void SetSrcCol(const TStr& Src) {
1161  if (!IsColName(Src)) { TExcept::Throw(Src + ": no such column"); }
1162  SrcCol = NormalizeColName(Src);
1163  }
1165  TStr GetDstCol() const { return DstCol; }
1167  void SetDstCol(const TStr& Dst) {
1168  if (!IsColName(Dst)) { TExcept::Throw(Dst + ": no such column"); }
1169  DstCol = NormalizeColName(Dst);
1170  }
1172  void AddEdgeAttr(const TStr& Attr) { AddGraphAttribute(Attr, true, false, false); }
1174  void AddEdgeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, true, false, false); }
1176  void AddSrcNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, true, false); }
1178  void AddSrcNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, true, false); }
1180  void AddDstNodeAttr(const TStr& Attr) { AddGraphAttribute(Attr, false, false, true); }
1182  void AddDstNodeAttr(TStrV& Attrs) { AddGraphAttributeV(Attrs, false, false, true); }
1184  void AddNodeAttr(const TStr& Attr) { AddSrcNodeAttr(Attr); AddDstNodeAttr(Attr); }
1186  void AddNodeAttr(TStrV& Attrs) { AddSrcNodeAttr(Attrs); AddDstNodeAttr(Attrs); }
1188  void SetCommonNodeAttrs(const TStr& SrcAttr, const TStr& DstAttr, const TStr& CommonAttrName){
1189  CommonNodeAttrs.Add(TStrTr(NormalizeColName(SrcAttr), NormalizeColName(DstAttr), NormalizeColName(CommonAttrName)));
1190  }
1192  TStrV GetSrcNodeIntAttrV() const;
1194  TStrV GetDstNodeIntAttrV() const;
1196  TStrV GetEdgeIntAttrV() const;
1198  TStrV GetSrcNodeFltAttrV() const;
1200  TStrV GetDstNodeFltAttrV() const;
1202  TStrV GetEdgeFltAttrV() const;
1204  TStrV GetSrcNodeStrAttrV() const;
1206  TStrV GetDstNodeStrAttrV() const;
1208  TStrV GetEdgeStrAttrV() const;
1209 
1211  static PTable GetNodeTable(const PNEANet& Network, TTableContext* Context);
1213  static PTable GetEdgeTable(const PNEANet& Network, TTableContext* Context);
1214 
1215 #ifdef USE_OPENMP
1216  static PTable GetEdgeTablePN(const PNGraphMP& Network, TTableContext* Context);
1218 #endif // USE_OPENMP
1219 
1221  static PTable GetFltNodePropertyTable(const PNEANet& Network, const TIntFltH& Property,
1222  const TStr& NodeAttrName, const TAttrType& NodeAttrType, const TStr& PropertyAttrName,
1223  TTableContext* Context);
1224 
1225 /***** Basic Getters *****/
1227  TAttrType GetColType(const TStr& ColName) const {
1228  TStr NColName = NormalizeColName(ColName);
1229  return ColTypeMap.GetDat(NColName).Val1;
1230  }
1232  TInt GetNumRows() const { return NumRows;}
1234  TInt GetNumValidRows() const { return NumValidRows;}
1235 
1238 
1239 /***** Iterators *****/
1241  TRowIterator BegRI() const { return TRowIterator(FirstValidRow, this);}
1243  TRowIterator EndRI() const { return TRowIterator(TTable::Last, this);}
1245  TRowIteratorWithRemove BegRIWR(){ return TRowIteratorWithRemove(FirstValidRow, this);}
1249  void GetPartitionRanges(TIntPrV& Partitions, TInt NumPartitions) const;
1250 
1251 /***** Table Operations *****/
1253  void Rename(const TStr& Column, const TStr& NewLabel);
1254 
1256  void Unique(const TStr& Col);
1258  void Unique(const TStrV& Cols, TBool Ordered = true);
1259 
1261 
1265  void Select(TPredicate& Predicate, TIntV& SelectedRows, TBool Remove = true);
1266  void Select(TPredicate& Predicate) {
1267  TIntV SelectedRows;
1268  Select(Predicate, SelectedRows, true);
1269  }
1270  void Classify(TPredicate& Predicate, const TStr& LabelName, const TInt& PositiveLabel = 1,
1271  const TInt& NegativeLabel = 0);
1272 
1274 
1276  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1277  TIntV& SelectedRows, TBool Remove = true);
1278  void SelectAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp) {
1279  TIntV SelectedRows;
1280  SelectAtomic(Col1, Col2, Cmp, SelectedRows, true);
1281  }
1282  void ClassifyAtomic(const TStr& Col1, const TStr& Col2, TPredComp Cmp,
1283  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0);
1284 
1286  void SelectAtomicConst(const TStr& Col, const TPrimitive& Val, TPredComp Cmp,
1287  TIntV& SelectedRows, PTable& SelectedTable, TBool Remove = true, TBool Table = true);
1288 
1289  template <class T>
1290  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp) {
1291  TIntV SelectedRows;
1292  PTable SelectedTable;
1293  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, true, false);
1294  }
1295  template <class T>
1296  void SelectAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp, PTable& SelectedTable) {
1297  TIntV SelectedRows;
1298  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, true);
1299  }
1300  template <class T>
1301  void ClassifyAtomicConst(const TStr& Col, const T& Val, TPredComp Cmp,
1302  const TStr& LabelName, const TInt& PositiveLabel = 1, const TInt& NegativeLabel = 0) {
1303  TIntV SelectedRows;
1304  PTable SelectedTable;
1305  SelectAtomicConst(Col, TPrimitive(Val), Cmp, SelectedRows, SelectedTable, false, false);
1306  ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
1307  }
1308 
1309  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp) {
1310  SelectAtomicConst(Col, Val, Cmp);
1311  }
1312  void SelectAtomicIntConst(const TStr& Col, const TInt& Val, TPredComp Cmp, PTable& SelectedTable) {
1313  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1314  }
1315 
1316  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp) {
1317  SelectAtomicConst(Col, Val, Cmp);
1318  }
1319  void SelectAtomicStrConst(const TStr& Col, const TStr& Val, TPredComp Cmp, PTable& SelectedTable) {
1320  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1321  }
1322 
1323  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp) {
1324  SelectAtomicConst(Col, Val, Cmp);
1325  }
1326  void SelectAtomicFltConst(const TStr& Col, const TFlt& Val, TPredComp Cmp, PTable& SelectedTable) {
1327  SelectAtomicConst(Col, Val, Cmp, SelectedTable);
1328  }
1329 
1331 
1334  void Group(const TStrV& GroupBy, const TStr& GroupColName, TBool Ordered = true, TBool UsePhysicalIds = true);
1335 
1337 
1340  void Count(const TStr& CountColName, const TStr& Col);
1341 
1343  void Order(const TStrV& OrderBy, TStr OrderColName = "", TBool ResetRankByMSC = false, TBool Asc = true);
1344 
1346  void Aggregate(const TStrV& GroupByAttrs, TAttrAggr AggOp, const TStr& ValAttr,
1347  const TStr& ResAttr, TBool Ordered = true);
1348 
1350  void AggregateCols(const TStrV& AggrAttrs, TAttrAggr AggOp, const TStr& ResAttr);
1351 
1353  TVec<PTable> SpliceByGroup(const TStrV& GroupByAttrs, TBool Ordered = true);
1354 
1356 
1359  PTable Join(const TStr& Col1, const TTable& Table, const TStr& Col2);
1360  PTable Join(const TStr& Col1, const PTable& Table, const TStr& Col2) {
1361  return Join(Col1, *Table, Col2);
1362  }
1363  PTable ThresholdJoin(const TStr& KeyCol1, const TStr& JoinCol1, const TTable& Table, const TStr& KeyCol2, const TStr& JoinCol2, TInt Threshold, TBool PerJoinKey = false);
1364 
1366  PTable SelfJoin(const TStr& Col) { return Join(Col, *this, Col); }
1367  PTable SelfSimJoin(const TStrV& Cols, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold) { return SimJoin(Cols, *this, Cols, DistanceColName, SimType, Threshold); }
1369 
1371  PTable SelfSimJoinPerGroup(const TStr& GroupAttr, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1372 
1374  PTable SelfSimJoinPerGroup(const TStrV& GroupBy, const TStr& SimCol, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1375 
1377  PTable SimJoin(const TStrV& Cols1, const TTable& Table, const TStrV& Cols2, const TStr& DistanceColName, const TSimType& SimType, const TFlt& Threshold);
1379  void SelectFirstNRows(const TInt& N);
1380 
1381  // Computes distances between elements in this->Col1 and Table->Col2 according
1382  // to given metric. Store the distances in DistCol, but keep only rows where
1383  // distance <= threshold
1384  // void Dist(const TStr& Col1, const TTable& Table, const TStr Col2, const TStr& DistColName,
1385  // const TMetric& Metric, TFlt threshold);
1386 
1388 
1391  void Defrag();
1392 
1394  void StoreIntCol(const TStr& ColName, const TIntV& ColVals);
1396  void StoreFltCol(const TStr& ColName, const TFltV& ColVals);
1398  void StoreStrCol(const TStr& ColName, const TStrV& ColVals);
1399 
1400  // Assumption: KeyAttr is a primary key in this table, and FKeyAttr is a primary key in
1401  // the argument table. Equivalent to SQL's: UPDATE this SET UpdateAttr = ReadAttr WHERE KeyAttr = FKeyAttr
1402  void UpdateFltFromTable(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1403  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1404 #ifdef GCC_ATOMIC
1405  void UpdateFltFromTableMP(const TStr& KeyAttr, const TStr& UpdateAttr, const TTable& Table,
1406  const TStr& FKeyAttr, const TStr& ReadAttr, TFlt DefaultFltVal = 0.0);
1407  // TODO: this should be a generic vector operation (parallel equivalent to TVec::PutAll)
1408  void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal);
1409 #endif // GCC_ATOMIC
1410 
1412  PTable Union(const TTable& Table);
1413  PTable Union(const PTable& Table) { return Union(*Table); };
1415  PTable UnionAll(const TTable& Table);
1416  PTable UnionAll(const PTable& Table) { return UnionAll(*Table); };
1418  void UnionAllInPlace(const TTable& Table);
1419  void UnionAllInPlace(const PTable& Table) { return UnionAllInPlace(*Table); };
1421  PTable Intersection(const TTable& Table);
1422  PTable Intersection(const PTable& Table) { return Intersection(*Table); };
1424  PTable Minus(TTable& Table);
1425  PTable Minus(const PTable& Table) { return Minus(*Table); };
1427  PTable Project(const TStrV& ProjectCols);
1429  void ProjectInPlace(const TStrV& ProjectCols);
1430 
1431  /* Column-wise arithmetic operations */
1432 
1434 
1437  void ColGenericOp(const TStr& Attr1, const TStr& Attr2, const TStr& ResAttr, TArithOp op);
1438 #ifdef USE_OPENMP
1439  void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1440 #endif // USE_OPENMP
1441  void ColAdd(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1444  void ColSub(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1446  void ColMul(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1448  void ColDiv(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1450  void ColMod(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1452  void ColMin(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1454  void ColMax(const TStr& Attr1, const TStr& Attr2, const TStr& ResultAttrName="");
1455 
1457  void ColGenericOp(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr,
1458  TArithOp op, TBool AddToFirstTable);
1459  // void ColGenericOpMP(TTable& Table, TBool AddToFirstTable, TInt ArgColIdx1, TInt ArgColIdx2,
1460  // TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op);
1462  void ColAdd(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1463  TBool AddToFirstTable=true);
1465  void ColSub(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1466  TBool AddToFirstTable=true);
1468  void ColMul(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1469  TBool AddToFirstTable=true);
1471  void ColDiv(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1472  TBool AddToFirstTable=true);
1474  void ColMod(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& ResAttr="",
1475  TBool AddToFirstTable=true);
1476 
1478  void ColGenericOp(const TStr& Attr1, const TFlt& Num, const TStr& ResAttr, TArithOp op, const TBool floatCast);
1479 #ifdef USE_OPENMP
1480  void ColGenericOpMP(const TInt& ColIdx1, const TInt& ColIdx2, TAttrType ArgType, const TFlt& Num, TArithOp op, TBool ShouldCast);
1481 #endif // USE_OPENMP
1482  void ColAdd(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1485  void ColSub(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1487  void ColMul(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1489  void ColDiv(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1491  void ColMod(const TStr& Attr1, const TFlt& Num, const TStr& ResultAttrName="", const TBool floatCast=false);
1492 
1493  /* Column-wise string operations */
1494 
1496  void ColConcat(const TStr& Attr1, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="");
1498  void ColConcat(const TStr& Attr1, TTable& Table, const TStr& Attr2, const TStr& Sep = "", const TStr& ResAttr="",
1499  TBool AddToFirstTable=true);
1501  void ColConcatConst(const TStr& Attr1, const TStr& Val, const TStr& Sep = "", const TStr& ResAttr="");
1502 
1504  void ReadIntCol(const TStr& ColName, TIntV& Result) const;
1506  void ReadFltCol(const TStr& ColName, TFltV& Result) const;
1508  void ReadStrCol(const TStr& ColName, TStrV& Result) const;
1509 
1511  void InitIds();
1512 
1514 
1516  PTable IsNextK(const TStr& OrderCol, TInt K, const TStr& GroupBy, const TStr& RankColName = "");
1517 
1518  void PrintSize();
1519  void PrintContextSize();
1521  TSize GetMemUsedKB();
1524 
1525  friend class TPt<TTable>;
1526  friend class TRowIterator;
1528 };
1529 
1531 
1532 template<class T>
1534  if (!NodeVals.IsKey(FNodeVal)) {
1535  TInt NodeVal = NodeVals.Len();
1536  Graph->AddNode(NodeVal);
1537  NodeVals.AddKey(FNodeVal);
1538  NodeVals.AddDat(FNodeVal, NodeVal);
1539  return NodeVal;
1540  } else { return NodeVals.GetDat(FNodeVal); }
1541 }
1542 
1543 template <class T>
1545  switch (Policy) {
1546  case aaMin: {
1547  T Res = V[0];
1548  for (TInt i = 1; i < V.Len(); i++) {
1549  if (V[i] < Res) { Res = V[i]; }
1550  }
1551  return Res;
1552  }
1553  case aaMax: {
1554  T Res = V[0];
1555  for (TInt i = 1; i < V.Len(); i++) {
1556  if (V[i] > Res) { Res = V[i]; }
1557  }
1558  return Res;
1559  }
1560  case aaFirst: {
1561  return V[0];
1562  }
1563  case aaLast:{
1564  return V[V.Len()-1];
1565  }
1566  case aaSum: {
1567  T Res = V[0];
1568  for (TInt i = 1; i < V.Len(); i++) {
1569  Res = Res + V[i];
1570  }
1571  return Res;
1572  }
1573  case aaMean: {
1574  T Res = V[0];
1575  for (TInt i = 1; i < V.Len(); i++) {
1576  Res = Res + V[i];
1577  }
1578  Res = Res / V.Len();
1579  return Res;
1580  }
1581  case aaMedian: {
1582  V.Sort();
1583  return V[V.Len()/2];
1584  }
1585  case aaCount: {
1586  // NOTE: Code should never reach here
1587  // I had to put this here to avoid a compiler warning.
1588  // Is there a better way to do this?
1589  return V[0];
1590  }
1591  }
1592  // Added to remove a compiler warning.
1593  T ShouldNotComeHere;
1594  return ShouldNotComeHere;
1595 }
1596 
1597 template <class T>
1598 void TTable::GroupByIntCol(const TStr& GroupBy, T& Grouping,
1599  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1600  TInt IdColIdx = GetColIdx(IdColName);
1601  if(!UsePhysicalIds && IdColIdx < 0){
1602  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1603  }
1604  // TO do: add a check if grouping already exists and is valid
1605  GroupingSanityCheck(GroupBy, atInt);
1606  if (All) {
1607  // Optimize for the common and most expensive case - iterate over only valid rows.
1608  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1609  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1610  UpdateGrouping<TInt>(Grouping, it.GetIntAttr(GroupBy), idx);
1611  }
1612  } else {
1613  // Consider only rows in IndexSet.
1614  for (TInt i = 0; i < IndexSet.Len(); i++) {
1615  if (IsRowValid(IndexSet[i])) {
1616  TInt RowIdx = IndexSet[i];
1617  const TIntV& Col = IntCols[GetColIdx(GroupBy)];
1618  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1619  UpdateGrouping<TInt>(Grouping, Col[RowIdx], idx);
1620  }
1621  }
1622  }
1623 }
1624 
1625 template <class T>
1626 void TTable::GroupByFltCol(const TStr& GroupBy, T& Grouping,
1627  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1628  TInt IdColIdx = GetColIdx(IdColName);
1629  if(!UsePhysicalIds && IdColIdx < 0){
1630  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1631  }
1632  GroupingSanityCheck(GroupBy, atFlt);
1633  if (All) {
1634  // Optimize for the common and most expensive case - iterate over only valid rows.
1635  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1636  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1637  UpdateGrouping<TFlt>(Grouping, it.GetFltAttr(GroupBy), idx);
1638  }
1639  } else {
1640  // Consider only rows in IndexSet.
1641  for (TInt i = 0; i < IndexSet.Len(); i++) {
1642  if (IsRowValid(IndexSet[i])) {
1643  TInt RowIdx = IndexSet[i];
1644  const TFltV& Col = FltCols[GetColIdx(GroupBy)];
1645  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1646  UpdateGrouping<TFlt>(Grouping, Col[RowIdx], idx);
1647  }
1648  }
1649  }
1650 }
1651 
1652 template <class T>
1653 void TTable::GroupByStrCol(const TStr& GroupBy, T& Grouping,
1654  const TIntV& IndexSet, TBool All, TBool UsePhysicalIds) const {
1655  TInt IdColIdx = GetColIdx(IdColName);
1656  if(!UsePhysicalIds && IdColIdx < 0){
1657  TExcept::Throw("Grouping: Either use physical row ids, or have an id column");
1658  }
1659  GroupingSanityCheck(GroupBy, atStr);
1660  if (All) {
1661  // Optimize for the common and most expensive case - iterate over all valid rows.
1662  for (TRowIterator it = BegRI(); it < EndRI(); it++) {
1663  TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1664  UpdateGrouping<TInt>(Grouping, it.GetStrMapByName(GroupBy), idx);
1665  }
1666  } else {
1667  // Consider only rows in IndexSet.
1668  for (TInt i = 0; i < IndexSet.Len(); i++) {
1669  if (IsRowValid(IndexSet[i])) {
1670  TInt RowIdx = IndexSet[i];
1671  TInt ColIdx = GetColIdx(GroupBy);
1672  TInt idx = UsePhysicalIds ? RowIdx : IntCols[IdColIdx][RowIdx];
1673  UpdateGrouping<TInt>(Grouping, StrColMaps[ColIdx][RowIdx], idx);
1674  }
1675  }
1676  }
1677 }
1678 
1679 template <class T>
1680 void TTable::UpdateGrouping(THash<T,TIntV>& Grouping, T Key, TInt Val) const{
1681  if (Grouping.IsKey(Key)) {
1682  Grouping.GetDat(Key).Add(Val);
1683  } else {
1684  TIntV NewGroup;
1685  NewGroup.Add(Val);
1686  Grouping.AddDat(Key, NewGroup);
1687  }
1688 }
1689 
1690 #ifdef GCC_ATOMIC
1691 template <class T>
1692 void TTable::UpdateGrouping(THashMP<T,TIntV>& Grouping, T Key, TInt Val) const{
1693  if (Grouping.IsKey(Key)) {
1694  //printf("y\n");
1695  Grouping.GetDat(Key).Add(Val);
1696  } else {
1697  //printf("n\n");
1698  TIntV NewGroup;
1699  NewGroup.Add(Val);
1700  Grouping.AddDat(Key, NewGroup);
1701  }
1702 }
1703 #endif // GCC_ATOMIC
1704 
1705 /*
1706 template<class T>
1707 void TTable::RegisterGrouping(const T& Grouping, const TStr& GroupByCol, TBool UsePhysicalIds){
1708  TStrV GroupByVec;
1709  GroupByVec.Add(GroupByCol);
1710  GroupStmt Stmt(NormalizeColNameV(GroupByVec), true, UsePhysicalIds);
1711  GroupMapping.AddKey(Stmt);
1712  for(T::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++){
1713  GroupMapping.GetDat(Stmt).AddDat(it.GetKey(), TIntV(it.GetDat()));
1714  }
1715 }
1716 */
1717 
1718 #endif //TABLE_H
1719 
Definition: bd.h:440
void UpdateGrouping(THash< T, TIntV > &Grouping, T Key, TInt Val) const
Template for utility function to update a grouping hash map.
Definition: table.h:1680
Definition: table.h:259
TStr GetDstCol() const
Gets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1165
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
Definition: table.cpp:3940
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
Definition: table.cpp:2478
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
Definition: table.h:642
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
Definition: table.cpp:159
TBool Valid
Definition: table.h:271
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
Definition: table.cpp:5476
Definition: table.h:259
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
Definition: table.cpp:3685
Main namespace for all the Snap global entities.
Definition: alg.h:1
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
Definition: table.cpp:4628
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
Definition: table.h:100
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
Definition: table.cpp:3240
TInt GetNumRows() const
Gets total number of rows in this table.
Definition: table.h:1232
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3599
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
Definition: table.cpp:1135
Definition: table.h:259
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1296
void AddInt(const TInt &Val)
Adds int attribute to this row.
Definition: table.h:243
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
Definition: table.h:591
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
Definition: table.h:581
TInt FirstValidRow
Physical index of first valid row.
Definition: table.h:553
int GetPrimHashCd() const
Returns primary hash code of the vector. Used by THash.
Definition: ds.h:999
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
Definition: table.cpp:4648
int Len() const
Definition: dt.h:490
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
Definition: table.cpp:3110
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
Definition: table.h:1013
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.
static PTable New(TTableContext *Context)
Definition: table.h:933
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
Definition: table.cpp:1310
static const TInt Last
Special value for Next vector entry - last row in table.
Definition: table.h:486
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
Definition: table.cpp:4511
TStrV GetStrVals() const
Gets string attributes of this row.
Definition: table.h:253
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5355
Primitive class: Wrapper around primitive data types.
Definition: table.h:211
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:147
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
Definition: table.cpp:1005
PTable Minus(const PTable &Table)
Definition: table.h:1425
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
Definition: table.cpp:1788
Schema Sch
Table Schema.
Definition: table.h:549
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
Definition: table.cpp:3357
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
Definition: table.cpp:1082
Definition: ds.h:130
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
Definition: table.cpp:1177
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
Definition: table.cpp:155
TPredComp
Comparison operators for selection predicates.
Definition: table.h:7
TStr GetStr(const TInt &KeyId) const
Returns a string with KeyId.
Definition: table.h:204
TPredicateNode(TPredOp Opr)
Constructor for logical operation predicate node (internal node)
Definition: table.h:66
void Defrag()
Releases memory of deleted rows, and defrags.
Definition: table.cpp:3311
PGraphMP ToGraphMP(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel using the sort-first algorithm. This is the recommende...
Definition: conv.h:192
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
Definition: table.cpp:3671
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
Definition: table.cpp:849
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
Definition: table.cpp:163
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
Definition: table.cpp:4673
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
Definition: table.h:564
TStr Rvar
Right variable of the comparison op.
Definition: table.h:21
void SetDstCol(const TStr &Dst)
Sets the name of the column to be used as dst nodes in the graph.
Definition: table.h:1167
TInt GetLastValidRowIdx()
Gets the id of the last valid row of the table.
static const int Mx
Definition: dt.h:1142
Definition: table.h:257
static PTable New(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->int hash.
Definition: table.h:938
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2506
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
Definition: table.cpp:992
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
Definition: table.cpp:1225
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
Definition: table.cpp:4152
TBool IsValid()
Definition: table.h:286
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
Definition: table.cpp:2557
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
Definition: table.cpp:4816
TArithOp
Possible column-wise arithmetic operations.
Definition: table.h:259
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
Definition: table.cpp:5514
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
Definition: table.cpp:252
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
Definition: table.cpp:4399
Definition: table.h:259
void SetSrcCol(const TStr &Src)
Sets the name of the column to be used as src nodes in the graph.
Definition: table.h:1160
TRowIteratorWithRemove(const TRowIteratorWithRemove &RowI)
Copy constructor.
Definition: table.h:387
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Definition: table.h:565
Predicate - encapsulates comparison operations.
Definition: table.h:82
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
Definition: table.cpp:208
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:2094
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
Definition: table.h:539
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
Definition: table.cpp:5297
const char * GetContextKey(TInt Val) const
Gets the Key of the Context StringVals pool. Used by ToGraph method in conv.cpp.
Definition: table.h:622
void Save(TSOut &SOut)
Saves TTableContext in binary to SOut.
Definition: table.h:197
int GetSecHashCd() const
Returns secondary hash code of the vector. Used by THash.
Definition: ds.h:1011
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
Definition: table.h:569
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
Definition: table.h:568
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
Definition: table.cpp:5083
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
Definition: table.cpp:1071
void AddNodeAttr(const TStr &Attr)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1184
TTableContext * Context
Execution Context.
Definition: table.h:545
void AddRow(const TTableRow &Row)
Adds row with values taken from given TTableRow.
Definition: table.h:1002
TSimType
Distance metrics for similarity joins.
Definition: table.h:149
TBool Start
A flag indicating whether the current row in the first valid row of the table.
Definition: table.h:377
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
Definition: table.cpp:3154
TAttrType Type
Type of the predicate variables.
Definition: table.h:17
TPredicateNode * Left
Left child of this node.
Definition: table.h:57
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
Definition: table.h:84
void InvalidateAffectedGroupings(const TStr &Attr)
Definition: table.cpp:1581
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
Definition: table.cpp:887
TInt LastValidRow
Physical index of last valid row.
Definition: table.h:554
void UnionAllInPlace(const PTable &Table)
Definition: table.h:1419
TPredicate(TPredicateNode *R)
Construct predicate with given root node R.
Definition: table.h:92
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
Definition: table.cpp:1569
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1319
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
Definition: table.cpp:4330
Iterator over a vector of tables.
Definition: table.h:423
void PrintContextSize()
Definition: table.cpp:3959
bool HasNext()
Checks if iterator has reached end of the sequence.
Definition: table.h:432
TPredicate()
Default constructor.
Definition: table.h:90
TPrimitive()
Definition: table.h:219
TPrimitive(const TPrimitive &Prim)
Definition: table.h:223
static TInt GetMP()
Definition: table.h:527
TTableContext()
Default constructor.
Definition: table.h:187
TAttrAggr
Possible policies for aggregating node attributes.
Definition: table.h:257
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
Definition: table.cpp:4828
Definition: fl.h:384
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
Definition: table.cpp:1105
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
Definition: table.cpp:1322
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
Definition: table.cpp:1060
Definition: table.h:149
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
Definition: table.cpp:256
Execution context.
Definition: table.h:180
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static PTable New(const PTable Table)
Returns pointer to a new table created from given Table.
Definition: table.h:948
void GenerateColTypeMap(THash< TStr, TPair< TInt, TInt > > &ColTypeIntMap)
Definition: table.cpp:337
void AddRightChild(TPredicateNode *Child)
Add right child to this node.
Definition: table.h:74
static PTable TableFromHashMap(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->int.
Definition: table.h:988
Schema GetSchema()
Gets the schema of this table.
Definition: table.h:1125
TFltV GetFltVals() const
Gets float attributes of this row.
Definition: table.h:251
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
Definition: table.h:599
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
Definition: table.h:1245
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
Definition: table.h:1234
TStr GetStr(const TInt &KeyId) const
Gets the string with KeyId.
Definition: table.h:1109
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
Definition: table.h:1241
TPredicateNode()
Default constructor.
Definition: table.h:60
Definition: table.h:7
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
Definition: table.cpp:3676
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
Definition: table.h:558
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp)
Definition: table.h:1290
Iterator class for TTable rows, that allows logical row removal while iterating.
Definition: table.h:374
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
Definition: ds.h:511
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.cpp:3388
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
Definition: table.cpp:3651
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
Definition: table.h:1626
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
Definition: table.cpp:181
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
Definition: table.cpp:4592
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
Definition: table.cpp:3689
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
Definition: table.h:86
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
Definition: table.cpp:5431
TIntV GetIntVals() const
Gets int attributes of this row.
Definition: table.h:249
TStr GetIdColName() const
Gets name of the id column of this table.
Definition: table.h:636
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
Definition: table.h:123
Definition: gbase.h:23
TTable(const TTable &Table)
Copy constructor.
Definition: table.h:919
TRowIteratorWithRemove()
Default constructor.
Definition: table.h:380
int GetSecHashCd() const
Definition: ds.h:157
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
Definition: table.cpp:669
Definition: table.h:7
Definition: dt.h:1386
void AddEdgeAttr(const TStr &Attr)
Adds column to be used as graph edge attribute.
Definition: table.h:1172
TStr GetStrValIdx(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
Definition: table.h:626
TRowIterator(const TRowIterator &RowI)
Copy constructor.
Definition: table.h:339
TStr StrVal
Definition: table.h:215
Definition: fl.h:58
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
Definition: table.cpp:2255
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
Definition: table.cpp:1994
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
Definition: table.cpp:1883
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
Definition: table.h:594
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
Definition: table.cpp:3206
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
Definition: table.cpp:854
PTable Join(const TStr &Col1, const PTable &Table, const TStr &Col2)
Definition: table.h:1360
TBool Result
Result of evaulating the predicate rooted at this node.
Definition: table.h:54
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
Definition: table.cpp:5221
void InvalidatePhysicalGroupings()
Definition: table.cpp:1577
TBool operator==(const GroupStmt &stmt) const
Definition: table.h:278
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Definition: table.h:145
Iterator class for TTable rows.
Definition: table.h:330
TInt GetNextRowIdx() const
Gets physical index of next row.
Definition: table.cpp:243
void DelKey(const TKey &Key)
Definition: hash.h:404
static const int Mn
Definition: dt.h:1141
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
Definition: table.cpp:1585
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
Definition: table.h:640
Definition: table.h:257
PGraph ToGraph(PTable Table, const TStr &SrcCol, const TStr &DstCol, TAttrAggr AggrPolicy)
Sequentially converts the table into a graph with links from nodes in SrcCol to those in DstCol...
Definition: conv.h:8
PTable Intersection(const PTable &Table)
Definition: table.h:1422
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
Definition: table.h:98
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
Definition: table.cpp:1027
Definition: table.h:149
Table Row (Record)
Definition: table.h:234
TRowIteratorWithRemove(TInt RowIdx, TTable *TablePtr, TBool IsStart)
Constructs iterator pointing to given row.
Definition: table.h:384
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
Definition: table.h:102
void RemoveNext()
Removes next row.
Definition: table.cpp:278
int GetPrimHashCd() const
Definition: table.h:303
TStr StrConst
Str const value if this object is a string constant.
Definition: table.h:24
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
Definition: table.cpp:3662
void AddColType(const TStr &ColName, TAttrType ColType, TInt Index)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:656
const TTable * Table
Reference to table containing this row.
Definition: table.h:332
int LoadCrossNet(TCrossNet &Graph, PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &EdgeAttrV)
Loads the edges from the TTable and EdgeAttrV specifies columns containing edge attributes.
Definition: conv.cpp:69
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp)
Definition: table.h:1309
static void Throw(const TStr &MsgStr)
Definition: ut.h:187
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
Definition: table.cpp:4665
void AddDstNodeAttr(const TStr &Attr)
Adds column to be used as dst node atribute of the graph.
Definition: table.h:1180
TBool UsePhysicalRowIds
Definition: table.h:270
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
Definition: table.cpp:3681
TInt IntVal
Definition: table.h:213
friend class TRowIterator
Definition: table.h:1526
TStr GetSrcCol() const
Gets the name of the column to be used as src nodes in the graph.
Definition: table.h:1158
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
Definition: table.cpp:3445
PGraphMP ToNetworkMP(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Does Table to Network conversion in parallel using the sort-first algorithm. This is the recommended ...
Definition: conv.h:696
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
Definition: table.cpp:102
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
Definition: table.cpp:4820
TFlt GetFltValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the float value at column ColIdx and row RowIdx.
Definition: table.h:1120
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
Definition: table.cpp:4376
TSize GetMemUsed() const
Definition: table.h:294
void PrintSize()
Definition: table.cpp:3930
TStrV GroupByAttrs
Definition: table.h:268
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
Definition: table.h:570
TStr Lvar
Left variable of the comparison op.
Definition: table.h:20
const char * GetKey(const int &KeyId) const
Definition: hash.h:893
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
Definition: table.cpp:5239
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R)
Compact prototype for constructing non-const atomic predicate.
Definition: table.h:42
TStr GetStr() const
Definition: table.h:228
Definition: table.h:7
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp)
Definition: table.h:1316
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:190
TRowIteratorWithRemove EndRIWR()
Gets iterator with reomve to the last valid row.
Definition: table.h:1247
TFltV FltVals
Values of the flt columns for this row.
Definition: table.h:237
size_t TSize
Definition: bd.h:58
TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const
Gets the integer mapping of the string at column ColIdx at row RowIdx.
Definition: table.h:1033
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp)
Definition: table.h:1278
void Reindex()
Reinitializes row ids.
Definition: table.cpp:1889
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
Definition: table.h:600
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
Definition: table.cpp:3891
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
Definition: table.h:1227
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
Definition: table.h:560
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:222
void LoadShM(TShMIn &ShMIn, bool SharedPool=true)
Load hash from shared memory. If shared pool is true load pool from shared memory.
Definition: hash.h:815
TPredicate(const TPredicate &Pred)
Copy constructor.
Definition: table.h:94
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
Definition: table.cpp:3666
TPrimitive(const TFlt &Val)
Definition: table.h:221
PTable SelfJoin(const TStr &Col)
Joins table with itself, on values of Col.
Definition: table.h:1366
Definition: table.h:149
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
Definition: table.h:1598
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
Definition: table.cpp:2272
static int GetHashCd(const int hc1, const int hc2)
Definition: bd.h:590
PGraph ToNetwork(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Converts the Table into a graph with edges from SrcCol to DstCol, and attribute vector defined by the...
Definition: conv.h:64
void Save(TSOut &SOut, bool PoolToo=true) const
Definition: hash.h:833
bool IsKey(const TKey &Key) const
Definition: hashmp.h:191
bool Val
Definition: dt.h:976
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
Definition: table.cpp:507
int LoadMode(TModeNet &Graph, PTable Table, const TStr &NCol, TStrV &NodeAttrV)
Loads the nodes specified in column NCol from the TTable with the attributes specified in NodeAttrV...
Definition: conv.cpp:14
TPair< TStr, TAttrType > TStrTypPr
Definition: table.h:1530
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
Definition: table.cpp:5410
TFlt FltVal
Definition: table.h:214
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
Definition: table.cpp:151
A class representing a cached grouping statement identifier.
Definition: table.h:266
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
Definition: table.h:638
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
Definition: table.cpp:186
TBool UsePhysicalIds()
Definition: table.h:277
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
Definition: table.h:592
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
Definition: table.h:601
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5378
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
Definition: table.cpp:2750
PTable UnionAll(const PTable &Table)
Definition: table.h:1416
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
Definition: table.cpp:4524
void AddRowI(const TRowIterator &RI)
Adds row corresponding to RI.
Definition: table.cpp:4295
TInt GetInt() const
Definition: table.h:226
char GetCh(const int &ChN) const
Definition: dt.h:486
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
Definition: table.h:566
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
Definition: table.cpp:800
PTable Union(const TTable &Table)
Returns union of this table with given Table.
Definition: table.cpp:4531
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
Definition: table.cpp:2873
Definition: table.h:5
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4242
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
Definition: table.cpp:5182
Definition: fl.h:128
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
Definition: table.cpp:4014
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
Definition: table.cpp:985
TBool Ordered
Definition: table.h:269
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
Definition: table.cpp:1152
The nodes of one particular mode in a TMMNet, and their neighbor vectors as TIntV attributes...
Definition: mmnet.h:23
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
Definition: table.h:666
TTableRow()
Default constructor.
Definition: table.h:241
TAttrType GetType() const
Definition: table.h:229
Definition: table.h:7
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
Definition: table.cpp:1215
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
Definition: table.h:182
static PTable TableFromHashMap(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->float.
Definition: table.h:995
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
Definition: table.cpp:4140
void SetCommonNodeAttrs(const TStr &SrcAttr, const TStr &DstAttr, const TStr &CommonAttrName)
Sets the columns to be used as both src and dst node attributes.
Definition: table.h:1188
int AddKey(const char *Key)
Definition: hash.h:968
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
Definition: table.h:489
TRowIterator()
Default constructor.
Definition: table.h:335
TPredComp Compare
Comparison op represented by this node.
Definition: table.h:19
TTableIterator(TVec< PTable > &PTableV)
Default constructor.
Definition: table.h:428
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:661
Definition: dt.h:1137
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
Definition: table.cpp:5212
int GetPrimHashCd() const
Definition: ds.h:156
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
Definition: table.cpp:3547
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
Definition: table.h:530
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
Definition: table.cpp:4687
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
Definition: table.h:573
TTableContext(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:189
TRowIterator & Next()
Increments the iterator (For Python compatibility).
Definition: table.cpp:135
static PTable Load(TSIn &SIn, TTableContext *Context)
Loads table from a binary format.
Definition: table.h:971
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
Definition: table.h:589
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
Definition: table.cpp:4615
TVec< PTable > PTableV
Vector of TTables which are to be iterated over.
Definition: table.h:424
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
Definition: table.cpp:4121
TPredicateNode * Right
Definition: table.h:58
void LoadShM(TShMIn &ShMIn)
Constructs the vector from a shared memory input.
Definition: ds.h:932
TVec< TFltV > FltCols
Data columns of floating point attributes.
Definition: table.h:559
void AddSrcNodeAttr(TStrV &Attrs)
Adds columns to be used as src node attributes of the graph.
Definition: table.h:1178
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
Definition: table.cpp:1049
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
Definition: table.h:593
TIntV Next
A vector describing the logical order of the rows.
Definition: table.h:555
void AddStr(const TStr &Val)
Adds string attribute to this row.
Definition: table.h:247
TPredicateNode(const TAtomicPredicate &A)
Constructor for atomic predicate node (leaf)
Definition: table.h:63
Definition: ds.h:32
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R, TInt ICnst, TFlt FCnst, TStr SCnst)
Construct predicate from given comparison op, variables and constants.
Definition: table.h:37
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1326
int AddKey(const TKey &Key)
Definition: hash.h:373
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
Definition: table.h:1243
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
Definition: table.cpp:971
TTable * Table
Reference to table containing this row.
Definition: table.h:376
PGraphMP ToGraphMP3(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel. Uses the hash-first method, which is less optimal...
Definition: conv.h:532
void Load(TSIn &SIn, bool PoolToo=true)
Definition: hash.h:811
TInt NumRows
Number of rows in the table (valid and invalid).
Definition: table.h:551
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
Definition: table.h:1024
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
Definition: table.cpp:795
TPrimitive(const TInt &Val)
Definition: table.h:220
TStr GetStrVal(const TStr &ColName, const TInt &RowIdx) const
Gets the value of string attribute ColName at row RowIdx.
Definition: table.h:1028
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
Definition: table.cpp:1266
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
Definition: table.cpp:218
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
Definition: table.cpp:1957
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2805
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
Definition: table.cpp:3178
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
Definition: table.h:590
void AddSrcNodeAttr(const TStr &Attr)
Adds column to be used as src node atribute of the graph.
Definition: table.h:1176
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
Definition: table.cpp:5230
void Invalidate()
Definition: table.h:287
static void SetMP(TInt Value)
Definition: table.h:526
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
Definition: table.cpp:3741
GroupStmt(const GroupStmt &stmt)
Definition: table.h:276
void operator()(TVec< TElem > *Node, TShMIn &ShMIn)
Definition: table.h:895
static const TInt Invalid
Special value for Next vector entry - logically removed row.
Definition: table.h:487
void LoadShM(TShMIn &ShMIn)
Loads TTableContext using shared memory, the object is read only.
Definition: table.h:193
TStrV StrVals
Values of the str columns for this row.
Definition: table.h:238
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
Definition: table.h:651
Definition: dt.h:412
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
Definition: table.cpp:3634
TBool IncludesAttr(const TStr &Attr)
Definition: table.h:288
Definition: table.h:7
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
Definition: table.cpp:282
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
Definition: table.cpp:4104
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
Definition: table.h:577
TInt IntConst
Int const value if this object is an integer constant.
Definition: table.h:22
void AddFlt(const TFlt &Val)
Adds float attribute to this row.
Definition: table.h:245
TTriple< TStr, TStr, TStr > TStrTr
Definition: ds.h:186
Definition: table.h:257
GroupStmt(const TStrV &Attrs, TBool ordered, TBool physical)
Definition: table.h:275
TPredOp Op
Logical op represented by this node.
Definition: table.h:53
void LoadTableShM(TShMIn &ShMIn, TTableContext *ContextTable)
Definition: table.cpp:360
GroupStmt()
Definition: table.h:273
TInt CurrTableIdx
Index of the current table pointed to by this iterator.
Definition: table.h:425
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
Definition: table.h:1653
T AggregateVector(TVec< T > &V, TAttrAggr Policy)
Aggregates vector into a single scalar value according to a policy.
Definition: table.h:1544
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
Definition: table.cpp:921
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
Definition: table.h:375
void AddNodeAttr(TStrV &Attrs)
Handles the common case where src and dst both belong to the same "universe" of entities.
Definition: table.h:1186
TPredicateNode * Root
Rood node of the current predicate tree.
Definition: table.h:87
Definition: gbase.h:23
Definition: table.h:259
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
Definition: table.cpp:1750
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Definition: table.cpp:235
Table class: Relational table with columnar data storage.
Definition: table.h:484
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:141
TPredicateNode(const TPredicateNode &P)
Copy constructor.
Definition: table.h:69
TStr GetStrValById(TInt ColIdx, TInt RowIdx) const
Gets the value of the string attribute at column ColIdx at row RowIdx.
Definition: table.h:1043
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp)
Definition: table.h:1323
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Definition: table.cpp:4174
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
Definition: table.cpp:3799
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
Definition: table.cpp:3096
TInt GetRowIdx() const
Gets physical index of current row.
Definition: table.cpp:239
TPredOp
Boolean operators for selection predicates.
Definition: table.h:5
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
Definition: table.cpp:5495
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
Definition: table.h:110
static PTable New(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->float hash.
Definition: table.h:943
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
Definition: table.cpp:229
static PTable New(const Schema &S, TTableContext *Context)
Definition: table.h:934
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp, PTable &SelectedTable)
Definition: table.h:1312
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
Definition: table.cpp:3535
void AddLeftChild(TPredicateNode *Child)
Add left child to this node.
Definition: table.h:72
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
Definition: table.cpp:1038
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
Definition: table.cpp:3852
void ConcatTable(const PTable &T)
Appends all rows of T to this table, and recalculate indices.
Definition: table.h:683
Hash-Table with multiprocessing support.
Definition: hashmp.h:81
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2622
TPrimitive(const TStr &Val)
Definition: table.h:222
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
Definition: table.cpp:2644
Definition: table.h:257
void Load(TSIn &SIn)
Loads TTableContext in binary from SIn.
Definition: table.h:191
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5321
TBool IsConst
Flag if this atomic node represents a constant value.
Definition: table.h:18
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
Definition: table.h:331
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5338
TIntV IntVals
Values of the int columns for this row.
Definition: table.h:236
Definition: table.h:7
Definition: bd.h:196
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
Definition: table.h:603
void Select(TPredicate &Predicate)
Definition: table.h:1266
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
Definition: table.cpp:1094
friend class TRowIteratorWithRemove
Definition: table.h:1527
Definition: table.h:5
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
Definition: table.cpp:4680
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
Definition: table.cpp:3064
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
Definition: table.cpp:4632
TAtomicPredicate()
Default constructor.
Definition: table.h:30
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
Definition: table.h:552
TTable()
Definition: table.cpp:302
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
Definition: table.cpp:2608
TRowIterator(TInt RowIdx, const TTable *TablePtr)
Constructs iterator to row RowIds of TablePtr.
Definition: table.h:337
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
Definition: table.cpp:1802
Definition: table.h:7
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
Definition: table.cpp:1916
Definition: table.h:257
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4840
TStr GetStrValByName(const TStr &ColName, const TInt &RowIdx) const
Gets the value of the string attribute at column ColName at row RowIdx.
Definition: table.h:1048
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.cpp:2866
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
Definition: bd.h:426
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
Definition: table.cpp:4087
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
Definition: table.cpp:1900
void Print()
Definition: table.h:317
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
Definition: table.cpp:1
Definition: table.h:257
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Definition: table.cpp:5310
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
Definition: table.cpp:3395
Definition: table.h:5
Definition: gbase.h:23
TPt< TTable > PTable
Definition: table.h:141
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
Definition: table.cpp:3657
PTable Next()
Returns next table in the sequence and update iterator.
Definition: table.h:430
Definition: table.h:7
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
Definition: table.cpp:248
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
Definition: table.cpp:4752
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
Definition: table.cpp:2813
TRowIterator & operator++(int)
Increments the iterator.
Definition: table.cpp:131
bool IsKey(const TKey &Key) const
Definition: hash.h:258
void GetVariables(TStrV &Variables)
Get variables in current predicate.
Definition: table.cpp:10
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TAttrType AttrType
Definition: table.h:216
static PTable LoadShM(TShMIn &ShMIn, TTableContext *Context)
Static constructor to load table from memory.
Definition: table.h:975
TDat & AddDat(const TKey &Key)
Definition: hashmp.h:181
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
Definition: table.cpp:4836
Definition: dt.h:974
bool IsRowValid(TInt RowIdx) const
Checks if RowIdx corresponds to a valid (i.e. not deleted) row.
Definition: table.h:801
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
Definition: table.cpp:4832
TCRef CRef
Definition: table.h:550
void RemoveFirstRow()
Removes first valid row of the table.
Definition: table.cpp:1122
bool IsStrIn(const TStr &Str) const
Definition: dt.h:557
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
Definition: table.cpp:274
Atomic predicate - encapsulates comparison operations.
Definition: table.h:15
TInt GetStrMapByName(const TStr &ColName, TInt RowIdx) const
Gets the integer mapping of the string at column ColName at row RowIdx.
Definition: table.h:1038
TBool IsColName(const TStr &ColName) const
Definition: table.h:646
TInt GetIntValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the integer value at column ColIdx and row RowIdx.
Definition: table.h:1116
Definition: table.h:259
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Definition: table.h:1533
TFlt GetFlt() const
Definition: table.h:227
Predicate node - represents a binary predicate operation on two predicate nodes.
Definition: table.h:51
int Len() const
Definition: hash.h:228
PTable SelfSimJoin(const TStrV &Cols, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Definition: table.h:1367
static PTable New()
Definition: table.h:932
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
Definition: table.cpp:3414
GroupStmt(const TStrV &Attrs)
Definition: table.h:274
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
Definition: table.cpp:3628
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void ClassifyAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Definition: table.h:1301
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
Definition: table.cpp:4567
void AddDstNodeAttr(TStrV &Attrs)
Adds columns to be used as dst node attributes of the graph.
Definition: table.h:1182
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
Definition: table.cpp:4442
void AddRowV(const TIntV &IntVals, const TFltV &FltVals, const TStrV &StrVals)
Adds row with values corresponding to the given vectors by type.
Definition: table.cpp:4317
const TDat & GetDat(const TKey &Key) const
Definition: hashmp.h:195
TTableContext * GetContext()
Returns the context.
Definition: table.h:1005
TFlt FltConst
Flt const value if this object is a float constant.
Definition: table.h:23
TBool Eval()
Return the result of evaluating current predicate.
Definition: table.cpp:14
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
Definition: table.cpp:5453
Definition: table.h:259
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
Definition: table.cpp:3969
TInt AddStr(const TStr &Key)
Adds string Key to the context, returns its KeyId.
Definition: table.h:199
TPredicateNode * Parent
Parent node of this node.
Definition: table.h:56
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
Definition: table.h:1020
THash< TInt, TInt > GetRowIdMap() const
Gets a map of logical to physical row ids.
Definition: table.h:1237
void SetFirstValidRow()
Sets the first valid row of the TTable.
Definition: table.h:811
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
Definition: table.cpp:3975
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
Definition: table.cpp:4824
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
Definition: table.cpp:4694
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
Definition: table.h:85
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
Definition: table.cpp:4421
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
Definition: table.cpp:1808
PGraphMP ToNetworkMP2(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Implements table to network conversion in parallel. Not the recommended algorithm, using ToNetworkMP instead.
Definition: conv.h:1118
Definition: table.h:257
Definition: table.h:5
void AddEdgeAttr(TStrV &Attrs)
Adds columns to be used as graph edge attributes.
Definition: table.h:1174
TVec< TPair< TStr, TAttrType > > Schema
A table schema is a vector of pairs .
Definition: table.h:262
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
Definition: table.cpp:4708
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
Definition: table.cpp:3616
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
Definition: table.cpp:1016
PTable Union(const PTable &Table)
Definition: table.h:1413
TAtomicPredicate Atom
Atomic predicate at this node.
Definition: table.h:55
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
Definition: table.cpp:3126
Implements a single CrossNet consisting of edges between two TModeNets (could be the same TModeNet) ...
Definition: mmnet.h:133
int GetSecHashCd() const
Definition: table.h:310