112 case LT:
return Val1 < Val2;
113 case LTE:
return Val1 <= Val2;
114 case EQ:
return Val1 == Val2;
115 case NEQ:
return Val1 != Val2;
116 case GTE:
return Val1 >= Val2;
117 case GT:
return Val1 > Val2;
118 default:
return false;
125 case LT:
return Val1 < Val2;
126 case LTE:
return Val1 <= Val2;
127 case EQ:
return Val1 == Val2;
128 case NEQ:
return Val1 != Val2;
129 case GTE:
return Val1 >= Val2;
130 case GT:
return Val1 > Val2;
131 case SUBSTR:
return Val2.IsStrIn(Val1);
133 default:
return false;
159 TMetric(
TStr Name) : MetricName(Name) {}
170 class TEuclideanMetric:
public TMetric {
172 TEuclideanMetric(
TStr Name) : TMetric(Name) {}
182 template <
class PGraph>
184 TTableContext* Context,
const double& C,
const double& Eps,
const int& MaxIter);
187 template <
class PGraph>
451 const TStr& SrcCol,
const TStr& DstCol,
459 const TStr& SrcCol,
const TStr& DstCol,
464 const TStr& SrcCol,
const TStr& DstCol,
475 const TStr& SrcCol,
const TStr& DstCol);
477 const TStr& SrcCol,
const TStr& DstCol);
541 TStr Result = ColName;
542 int RLen = Result.
Len();
543 if (RLen == 0) {
return Result; }
544 if (Result.
GetCh(0) ==
'_') {
return Result; }
545 if (RLen >= 2 && Result.
GetCh(RLen-2) ==
'-') {
return Result; }
546 return Result +
"-1";
628 const TInt& PositiveLabel = 1,
const TInt& NegativeLabel= 0);
784 const TInt& CompareByIndex,
TBool Asc =
true);
787 const TIntV& CompareByIndices,
TBool Asc =
true);
796 const TIntV& SortByIndices,
TBool Asc =
true);
799 const TIntV& SortByIndices,
TBool Asc =
true);
802 const TIntV& SortByIndices,
TBool Asc =
true);
822 for (
int i = 0; i <
Next.
Len(); i++) {
835 const TStr& KeyCol2,
const TStr& JoinCol2);
934 return new TTable(S, Context);
939 return new TTable(H, Col1, Col2, Context, IsStrKeys);
944 return new TTable(H, Col1, Col2, Context, IsStrKeys);
958 const char& Separator =
'\t',
TBool HasTitleLine =
false);
961 const TIntV& RelevantCols,
const char& Separator =
'\t',
TBool HasTitleLine =
false);
976 void Dump(FILE *OutF=stdout)
const;
981 PTable T =
New(H, Col1, Col2, Context, IsStrKeys);
988 PTable T =
New(H, Col1, Col2, Context, IsStrKeys);
1108 return IntCols[ColIdx][RowIdx];
1112 return FltCols[ColIdx][RowIdx];
1209 #endif // USE_OPENMP
1213 const TStr& NodeAttrName,
const TAttrType& NodeAttrType,
const TStr& PropertyAttrName,
1259 Select(Predicate, SelectedRows,
true);
1262 const TInt& NegativeLabel = 0);
1274 const TStr& LabelName,
const TInt& PositiveLabel = 1,
const TInt& NegativeLabel = 0);
1293 const TStr& LabelName,
const TInt& PositiveLabel = 1,
const TInt& NegativeLabel = 0) {
1297 ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
1338 const TStr& ResAttr,
TBool Ordered =
true);
1352 return Join(Col1, *Table, Col2);
1394 const TStr& FKeyAttr,
const TStr& ReadAttr,
TFlt DefaultFltVal = 0.0);
1397 const TStr& FKeyAttr,
const TStr& ReadAttr,
TFlt DefaultFltVal = 0.0);
1400 #endif // GCC_ATOMIC
1431 #endif // USE_OPENMP
1454 TBool AddToFirstTable=
true);
1457 TBool AddToFirstTable=
true);
1460 TBool AddToFirstTable=
true);
1463 TBool AddToFirstTable=
true);
1466 TBool AddToFirstTable=
true);
1472 #endif // USE_OPENMP
1490 TBool AddToFirstTable=
true);
1511 const double& C = 0.85,
const double& Eps = 1e-4,
const int& MaxIter = 100) {
1541 if (!NodeVals.
IsKey(FNodeVal)) {
1542 TInt NodeVal = NodeVals.
Len();
1543 Graph->AddNode(NodeVal);
1544 NodeVals.
AddKey(FNodeVal);
1545 NodeVals.
AddDat(FNodeVal, NodeVal);
1547 }
else {
return NodeVals.
GetDat(FNodeVal); }
1555 for (
TInt i = 1; i < V.
Len(); i++) {
1556 if (V[i] < Res) { Res = V[i]; }
1562 for (
TInt i = 1; i < V.
Len(); i++) {
1563 if (V[i] > Res) { Res = V[i]; }
1571 return V[V.
Len()-1];
1575 for (
TInt i = 1; i < V.
Len(); i++) {
1582 for (
TInt i = 1; i < V.
Len(); i++) {
1590 return V[V.
Len()/2];
1600 T ShouldNotComeHere;
1601 return ShouldNotComeHere;
1608 if(!UsePhysicalIds && IdColIdx < 0){
1609 TExcept::Throw(
"Grouping: Either use physical row ids, or have an id column");
1616 TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1617 UpdateGrouping<TInt>(Grouping, it.GetIntAttr(GroupBy), idx);
1621 for (
TInt i = 0; i < IndexSet.
Len(); i++) {
1623 TInt RowIdx = IndexSet[i];
1625 TInt idx = UsePhysicalIds ? RowIdx :
IntCols[IdColIdx][RowIdx];
1626 UpdateGrouping<TInt>(Grouping, Col[RowIdx], idx);
1636 if(!UsePhysicalIds && IdColIdx < 0){
1637 TExcept::Throw(
"Grouping: Either use physical row ids, or have an id column");
1643 TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1644 UpdateGrouping<TFlt>(Grouping, it.GetFltAttr(GroupBy), idx);
1648 for (
TInt i = 0; i < IndexSet.
Len(); i++) {
1650 TInt RowIdx = IndexSet[i];
1652 TInt idx = UsePhysicalIds ? RowIdx :
IntCols[IdColIdx][RowIdx];
1653 UpdateGrouping<TFlt>(Grouping, Col[RowIdx], idx);
1663 if(!UsePhysicalIds && IdColIdx < 0){
1664 TExcept::Throw(
"Grouping: Either use physical row ids, or have an id column");
1670 TInt idx = UsePhysicalIds ? it.GetRowIdx() : it.GetIntAttr(IdColIdx);
1671 UpdateGrouping<TInt>(Grouping, it.GetStrMapByName(GroupBy), idx);
1675 for (
TInt i = 0; i < IndexSet.
Len(); i++) {
1677 TInt RowIdx = IndexSet[i];
1679 TInt idx = UsePhysicalIds ? RowIdx :
IntCols[IdColIdx][RowIdx];
1680 UpdateGrouping<TInt>(Grouping,
StrColMaps[ColIdx][RowIdx], idx);
1688 if (Grouping.
IsKey(Key)) {
1693 Grouping.
AddDat(Key, NewGroup);
1700 if (Grouping.
IsKey(Key)) {
1707 Grouping.
AddDat(Key, NewGroup);
1710 #endif // GCC_ATOMIC
1728 template <
class PGraph>
1730 TTableContext* Context,
const double& C,
const double& Eps,
const int& MaxIter) {
1731 int NumGraphs = GraphSeq.
Len();
1732 TableSeq.
Reserve(NumGraphs, NumGraphs);
1734 for (
TInt i = 0; i < NumGraphs; i++){
1736 GetPageRank(GraphSeq[i], PRankH, C, Eps, MaxIter);
1742 template <
class PGraph>
1745 int NumGraphs = GraphSeq.
Len();
1746 TableSeq.
Reserve(NumGraphs, NumGraphs);
1748 for (
TInt i = 0; i < NumGraphs; i++){
1751 GetHits(GraphSeq[i], HubH, AuthH, MaxIter);
1754 PTable HitsT = HubT->Join(
"NodeId", AuthT,
"NodeId");
1755 HitsT->Rename(
"1.NodeId",
"NodeId");
1756 HitsT->Rename(
"1.Hub",
"Hub");
1757 HitsT->Rename(
"2.Authority",
"Authority");
1762 HitsT->ProjectInPlace(V);
1763 TableSeq[i] = HitsT;
void UpdateGrouping(THash< T, TIntV > &Grouping, T Key, TInt Val) const
Template for utility function to update a grouping hash map.
TStr GetDstCol() const
Gets the name of the column to be used as dst nodes in the graph.
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
TInt GetNumRows() const
Gets total number of rows in this table.
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, PTable &SelectedTable)
void AddInt(const TInt &Val)
Adds int attribute to this row.
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
TInt FirstValidRow
Physical index of first valid row.
int GetPrimHashCd() const
Returns primary hash code of the vector. Used by THash.
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.
static PTable New(TTableContext *Context)
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
static const TInt Last
Special value for Next vector entry - last row in table.
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
TStrV GetStrVals() const
Gets string attributes of this row.
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Primitive class: Wrapper around primitive data types.
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
PTable Minus(const PTable &Table)
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
TPredComp
Comparison operators for selection predicates.
TStr GetStr(const TInt &KeyId) const
Returns a string with KeyId.
TPredicateNode(TPredOp Opr)
Constructor for logical operation predicate node (internal node)
void Defrag()
Releases memory of deleted rows, and defrags.
PGraphMP ToGraphMP(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel using the sort-first algorithm. This is the recommende...
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
TStr Rvar
Right variable of the comparison op.
void SetDstCol(const TStr &Dst)
Sets the name of the column to be used as dst nodes in the graph.
TInt GetLastValidRowIdx()
Gets the id of the last valid row of the table.
static PTable New(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->int hash.
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
void GetPageRank(const PGraph &Graph, TIntFltH &PRankH, const double &C=0.85, const double &Eps=1e-4, const int &MaxIter=100)
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
TArithOp
Possible column-wise arithmetic operations.
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
TSizeTy Len() const
Returns the number of elements in the vector.
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
void SetSrcCol(const TStr &Src)
Sets the name of the column to be used as src nodes in the graph.
TRowIteratorWithRemove(const TRowIteratorWithRemove &RowI)
Copy constructor.
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Predicate - encapsulates comparison operations.
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
const char * GetContextKey(TInt Val) const
Gets the Key of the Context StringVals pool. Used by ToGraph method in conv.cpp.
void Save(TSOut &SOut)
Saves TTableContext in binary to SOut.
int GetSecHashCd() const
Returns secondary hash code of the vector. Used by THash.
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
void AddNodeAttr(const TStr &Attr)
Handles the common case where src and dst both belong to the same "universe" of entities.
TTableContext * Context
Execution Context.
void AddRow(const TTableRow &Row)
Adds row with values taken from given TTableRow.
TSimType
Distance metrics for similarity joins.
TBool Start
A flag indicating whether the current row in the first valid row of the table.
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
TAttrType Type
Type of the predicate variables.
TPredicateNode * Left
Left child of this node.
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
void InvalidateAffectedGroupings(const TStr &Attr)
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
TInt LastValidRow
Physical index of last valid row.
void UnionAllInPlace(const PTable &Table)
TPredicate(TPredicateNode *R)
Construct predicate with given root node R.
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp, PTable &SelectedTable)
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
Iterator over a vector of tables.
bool HasNext()
Checks if iterator has reached end of the sequence.
TPredicate()
Default constructor.
TPrimitive(const TPrimitive &Prim)
TTableContext()
Default constructor.
TAttrAggr
Possible policies for aggregating node attributes.
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
const TDat & GetDat(const TKey &Key) const
static PTable New(const PTable Table)
Returns pointer to a new table created from given Table.
void AddRightChild(TPredicateNode *Child)
Add right child to this node.
static PTable TableFromHashMap(const THash< TInt, TInt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->int.
Schema GetSchema()
Gets the schema of this table.
TFltV GetFltVals() const
Gets float attributes of this row.
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
TStr GetStr(const TInt &KeyId) const
Gets the string with KeyId.
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
TPredicateNode()
Default constructor.
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
void SelectAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp)
Iterator class for TTable rows, that allows logical row removal while iterating.
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
TIntV GetIntVals() const
Gets int attributes of this row.
TStr GetIdColName() const
Gets name of the id column of this table.
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
TTable(const TTable &Table)
Copy constructor.
TRowIteratorWithRemove()
Default constructor.
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
void AddEdgeAttr(const TStr &Attr)
Adds column to be used as graph edge attribute.
TRowIterator(const TRowIterator &RowI)
Copy constructor.
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
PTable Join(const TStr &Col1, const PTable &Table, const TStr &Col2)
void MapHits(const TVec< PGraph > &GraphSeq, TVec< PTable > &TableSeq, TTableContext *Context, const int &MaxIter)
Gets sequence of Hits tables from given GraphSeq into TableSeq.
TBool Result
Result of evaulating the predicate rooted at this node.
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
void InvalidatePhysicalGroupings()
TBool operator==(const GroupStmt &stmt) const
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Iterator class for TTable rows.
TInt GetNextRowIdx() const
Gets physical index of next row.
void DelKey(const TKey &Key)
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
PGraph ToGraph(PTable Table, const TStr &SrcCol, const TStr &DstCol, TAttrAggr AggrPolicy)
Sequentially converts the table into a graph with links from nodes in SrcCol to those in DstCol...
PTable Intersection(const PTable &Table)
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
TRowIteratorWithRemove(TInt RowIdx, TTable *TablePtr, TBool IsStart)
Constructs iterator pointing to given row.
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
void RemoveNext()
Removes next row.
int GetPrimHashCd() const
TStr StrConst
Str const value if this object is a string constant.
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
void AddColType(const TStr &ColName, TAttrType ColType, TInt Index)
Adds column with name ColName and type ColType to the ColTypeMap.
const TTable * Table
Reference to table containing this row.
int LoadCrossNet(TCrossNet &Graph, PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &EdgeAttrV)
Loads the edges from the TTable and EdgeAttrV specifies columns containing edge attributes.
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp)
static void Throw(const TStr &MsgStr)
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
void AddDstNodeAttr(const TStr &Attr)
Adds column to be used as dst node atribute of the graph.
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
friend class TRowIterator
TStr GetSrcCol() const
Gets the name of the column to be used as src nodes in the graph.
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
PGraphMP ToNetworkMP(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Does Table to Network conversion in parallel using the sort-first algorithm. This is the recommended ...
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
TFlt GetFltValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the float value at column ColIdx and row RowIdx.
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
TStr Lvar
Left variable of the comparison op.
const char * GetKey(const int &KeyId) const
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R)
Compact prototype for constructing non-const atomic predicate.
void SelectAtomicStrConst(const TStr &Col, const TStr &Val, TPredComp Cmp)
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
TRowIteratorWithRemove EndRIWR()
Gets iterator with reomve to the last valid row.
TFltV FltVals
Values of the flt columns for this row.
TInt GetStrMapById(TInt ColIdx, TInt RowIdx) const
Gets the integer mapping of the string at column ColIdx at row RowIdx.
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp)
void Reindex()
Reinitializes row ids.
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
TPredicate(const TPredicate &Pred)
Copy constructor.
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
TPrimitive(const TFlt &Val)
PTable SelfJoin(const TStr &Col)
Joins table with itself, on values of Col.
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
static int GetHashCd(const int hc1, const int hc2)
void Save(TSOut &SOut, bool PoolToo=true) const
bool IsKey(const TKey &Key) const
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
int LoadMode(TModeNet &Graph, PTable Table, const TStr &NCol, TStrV &NodeAttrV)
Loads the nodes specified in column NCol from the TTable with the attributes specified in NodeAttrV...
TPair< TStr, TAttrType > TStrTypPr
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
A class representing a cached grouping statement identifier.
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
PTable UnionAll(const PTable &Table)
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
char GetCh(const int &ChN) const
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
PTable Union(const TTable &Table)
Returns union of this table with given Table.
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
The nodes of one particular mode in a TMMNet, and their neighbor vectors as TIntV attributes...
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
TTableRow()
Default constructor.
TAttrType GetType() const
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
static PTable TableFromHashMap(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Builds table from hash table of int->float.
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
void SetCommonNodeAttrs(const TStr &SrcAttr, const TStr &DstAttr, const TStr &CommonAttrName)
Sets the columns to be used as both src and dst node attributes.
int AddKey(const char *Key)
void GetHits(const PGraph &Graph, TIntFltH &NIdHubH, TIntFltH &NIdAuthH, const int &MaxIter=20)
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
TRowIterator()
Default constructor.
TPredComp Compare
Comparison op represented by this node.
TTableIterator(TVec< PTable > &PTableV)
Default constructor.
static TTableIterator GetMapHitsIterator(const TVec< PNEANet > &GraphSeq, TTableContext *Context, const int &MaxIter=20)
Gets sequence of Hits tables from given GraphSeq.
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
int GetPrimHashCd() const
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
TTableContext(TSIn &SIn)
Loads TTableContext in binary from SIn.
TRowIterator & Next()
Increments the iterator (For Python compatibility).
static PTable Load(TSIn &SIn, TTableContext *Context)
Loads table from a binary format.
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
TVec< PTable > PTableV
Vector of TTables which are to be iterated over.
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
TVec< TFltV > FltCols
Data columns of floating point attributes.
void AddSrcNodeAttr(TStrV &Attrs)
Adds columns to be used as src node attributes of the graph.
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
TIntV Next
A vector describing the logical order of the rows.
void AddStr(const TStr &Val)
Adds string attribute to this row.
TPredicateNode(const TAtomicPredicate &A)
Constructor for atomic predicate node (leaf)
TAtomicPredicate(TAttrType Typ, TBool IsCnst, TPredComp Cmp, TStr L, TStr R, TInt ICnst, TFlt FCnst, TStr SCnst)
Construct predicate from given comparison op, variables and constants.
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp, PTable &SelectedTable)
int AddKey(const TKey &Key)
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
TTable * Table
Reference to table containing this row.
PGraphMP ToGraphMP3(PTable Table, const TStr &SrcCol, const TStr &DstCol)
Performs table to graph conversion in parallel. Uses the hash-first method, which is less optimal...
void AddRow(const TRowIterator &RI)
Adds row corresponding to RI.
void Load(TSIn &SIn, bool PoolToo=true)
TInt NumRows
Number of rows in the table (valid and invalid).
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
static TTableIterator GetMapPageRank(const TVec< PNEANet > &GraphSeq, TTableContext *Context, const double &C=0.85, const double &Eps=1e-4, const int &MaxIter=100)
Gets sequence of PageRank tables from given GraphSeq.
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
TPrimitive(const TInt &Val)
TStr GetStrVal(const TStr &ColName, const TInt &RowIdx) const
Gets the value of string attribute ColName at row RowIdx.
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
void AddSrcNodeAttr(const TStr &Attr)
Adds column to be used as src node atribute of the graph.
void MapPageRank(const TVec< PGraph > &GraphSeq, TVec< PTable > &TableSeq, TTableContext *Context, const double &C, const double &Eps, const int &MaxIter)
Gets sequence of PageRank tables from given GraphSeq into TableSeq.
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
TStr GetStrVal(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
static void SetMP(TInt Value)
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
GroupStmt(const GroupStmt &stmt)
static const TInt Invalid
Special value for Next vector entry - logically removed row.
TStrV StrVals
Values of the str columns for this row.
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
TBool IncludesAttr(const TStr &Attr)
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
TInt IntConst
Int const value if this object is an integer constant.
void AddFlt(const TFlt &Val)
Adds float attribute to this row.
TTriple< TStr, TStr, TStr > TStrTr
GroupStmt(const TStrV &Attrs, TBool ordered, TBool physical)
TPredOp Op
Logical op represented by this node.
TInt CurrTableIdx
Index of the current table pointed to by this iterator.
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
T AggregateVector(TVec< T > &V, TAttrAggr Policy)
Aggregates vector into a single scalar value according to a policy.
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
void AddNodeAttr(TStrV &Attrs)
Handles the common case where src and dst both belong to the same "universe" of entities.
TPredicateNode * Root
Rood node of the current predicate tree.
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Table class: Relational table with columnar data storage.
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
TPredicateNode(const TPredicateNode &P)
Copy constructor.
TStr GetStrValById(TInt ColIdx, TInt RowIdx) const
Gets the value of the string attribute at column ColIdx at row RowIdx.
void SelectAtomicFltConst(const TStr &Col, const TFlt &Val, TPredComp Cmp)
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
TInt GetRowIdx() const
Gets physical index of current row.
TPredOp
Boolean operators for selection predicates.
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
static PTable New(const THash< TInt, TFlt > &H, const TStr &Col1, const TStr &Col2, TTableContext *Context, const TBool IsStrKeys=false)
Returns pointer to a table constructed from given int->float hash.
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
static PTable New(const Schema &S, TTableContext *Context)
void SelectAtomicIntConst(const TStr &Col, const TInt &Val, TPredComp Cmp, PTable &SelectedTable)
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
void AddLeftChild(TPredicateNode *Child)
Add left child to this node.
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
void ConcatTable(const PTable &T)
Appends all rows of T to this table, and recalculate indices.
Hash-Table with multiprocessing support.
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
TPrimitive(const TStr &Val)
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
void Load(TSIn &SIn)
Loads TTableContext in binary from SIn.
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
TBool IsConst
Flag if this atomic node represents a constant value.
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
TIntV IntVals
Values of the int columns for this row.
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
void Select(TPredicate &Predicate)
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
friend class TRowIteratorWithRemove
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
TAtomicPredicate()
Default constructor.
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
TRowIterator(TInt RowIdx, const TTable *TablePtr)
Constructs iterator to row RowIds of TablePtr.
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
TStr GetStrValByName(const TStr &ColName, const TInt &RowIdx) const
Gets the value of the string attribute at column ColName at row RowIdx.
void Reserve(const TSizeTy &_MxVals)
Reserves enough memory for the vector to store _MxVals elements.
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
PTable Next()
Returns next table in the sequence and update iterator.
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
TRowIterator & operator++(int)
Increments the iterator.
bool IsKey(const TKey &Key) const
void GetVariables(TStrV &Variables)
Get variables in current predicate.
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
TDat & AddDat(const TKey &Key)
PGraph ToNetwork(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Converts table to a network. Suitable for PNEANet - Requires node and edge attribute column names as ...
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
bool IsRowValid(TInt RowIdx) const
Checks if RowIdx corresponds to a valid (i.e. not deleted) row.
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
void RemoveFirstRow()
Removes first valid row of the table.
bool IsStrIn(const TStr &Str) const
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
Atomic predicate - encapsulates comparison operations.
TInt GetStrMapByName(const TStr &ColName, TInt RowIdx) const
Gets the integer mapping of the string at column ColName at row RowIdx.
TBool IsColName(const TStr &ColName) const
TInt GetIntValAtRowIdx(const TInt &ColIdx, const TInt &RowIdx)
Get the integer value at column ColIdx and row RowIdx.
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Predicate node - represents a binary predicate operation on two predicate nodes.
PTable SelfSimJoin(const TStrV &Cols, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
GroupStmt(const TStrV &Attrs)
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
TDat & AddDat(const TKey &Key)
void ClassifyAtomicConst(const TStr &Col, const T &Val, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
void AddDstNodeAttr(TStrV &Attrs)
Adds columns to be used as dst node attributes of the graph.
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
const TDat & GetDat(const TKey &Key) const
TTableContext * GetContext()
Returns the context.
TFlt FltConst
Flt const value if this object is a float constant.
TBool Eval()
Return the result of evaluating current predicate.
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
TInt AddStr(const TStr &Key)
Adds string Key to the context, returns its KeyId.
TPredicateNode * Parent
Parent node of this node.
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
THash< TInt, TInt > GetRowIdMap() const
Gets a map of logical to physical row ids.
void SetFirstValidRow()
Sets the first valid row of the TTable.
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
PGraphMP ToNetworkMP2(PTable Table, const TStr &SrcCol, const TStr &DstCol, TStrV &SrcAttrV, TStrV &DstAttrV, TStrV &EdgeAttrV, TAttrAggr AggrPolicy)
Implements table to network conversion in parallel. Not the recommended algorithm, using ToNetworkMP instead.
void AddEdgeAttr(TStrV &Attrs)
Adds columns to be used as graph edge attributes.
TVec< TPair< TStr, TAttrType > > Schema
A table schema is a vector of pairs
.
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
Vector is a sequence TVal objects representing an array that can change in size.
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
PTable Union(const PTable &Table)
TAtomicPredicate Atom
Atomic predicate at this node.
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
Implements a single CrossNet consisting of edges between two TModeNets (could be the same TModeNet) ...