17 while (!(Curr == NULL && Prev ==
Root)) {
19 if (Prev == NULL || Prev == Curr->
Parent) {
21 if (Curr->
Left != NULL) {
24 }
else if (Curr->
Right != NULL) {
32 }
else if (Prev == Curr->
Left) {
142 if (
CurrRowIdx == TTable::Last) {
return false; }
143 if (RowI.
CurrRowIdx == TTable::Last) {
return true; }
203 Result =
TBool(
false);
216 CurrRowIdx(RowIdx), Table(TablePtr), Start(RowIdx == TablePtr->FirstValidRow) {}
230 if (
CurrRowIdx == TTable::Last) {
return false; }
231 if (RowI.
CurrRowIdx == TTable::Last) {
return true; }
295 Result =
TBool(
false);
303 FirstValidRow(0), LastValidRow(-1) {}
306 NumValidRows(0), FirstValidRow(0), LastValidRow(-1) {}
309 NumRows(0), NumValidRows(0), FirstValidRow(0), LastValidRow(-1), IsNextDirty(0) {
313 for (
TInt i = 0; i < TableSchema.
Len(); i++) {
314 TStr ColName = TableSchema[i].Val1;
338 NumValidRows(SIn), FirstValidRow(SIn), LastValidRow(SIn), Next(SIn), IntCols(SIn),
339 FltCols(SIn), StrColMaps(SIn) {
367 NumValidRows(H.Len()), FirstValidRow(0), LastValidRow(H.Len()-1) {
394 NumRows(H.Len()), NumValidRows(H.Len()), FirstValidRow(0), LastValidRow(H.Len()-1) {
419 Sch(Table.Sch), SrcCol(Table.SrcCol), DstCol(Table.DstCol), EdgeAttrV(Table.EdgeAttrV),
420 SrcNodeAttrV(Table.SrcNodeAttrV), DstNodeAttrV(Table.DstNodeAttrV),
421 CommonNodeAttrs(Table.CommonNodeAttrs) {
438 TSsParser Ss(InFNm,
'\t',
false,
false,
false);
439 TInt rowsToPeek = 1000;
441 TInt lastComment = 0;
453 for (
TInt i = 0; i < numCols; i++) {
456 else if (Ss.
IsFlt(i)) {
464 if (currRow > rowsToPeek || Ss.
Eof())
break;
468 TSsParser SsNames(InFNm, Separator,
false,
false,
false);
469 for (
int i = 0; i < lastComment; i++) { SsNames.
Next();}
471 TStr first(SsNames[0]);
474 if (first != comment) {
475 for (
int i = 1; i < first.
Len(); i++){
476 if (first[i] !=
' ') { begin = i;
break;}
480 for (
int i = 1; i < SsNames.
GetFlds(); i++) {attrV.
Add(SsNames[i]);}
481 for (
TInt i = 0; i < numCols; i++) {
488 const char& Separator,
TBool HasTitleLine) {
490 TInt RowLen = T->Sch.Len();
492 for (
TInt i = 0; i < RowLen; i++) {
493 ColTypes[i] = T->GetSchemaColType(i);
508 TInt L = strlen(Ss[i]);
509 if (Ss[i][L-1] <
' ') { Ss[i][L-1] = 0; }
520 int NumThreads = omp_get_max_threads();
522 uint64 Delta = Rem / NumThreads;
523 if (Delta < 1) Delta = 1;
530 for (
int i = 1; i < NumThreads; i++) {
531 StartIntV[i] = StartIntV[i-1] + Delta;
536 omp_set_num_threads(NumThreads);
537 #pragma omp parallel for schedule(dynamic) reduction(+:Cnt)
538 for (
int i = 0; i < NumThreads; i++) {
540 Cnt += LineCountV[i];
545 for (
int i = 1; i < NumThreads; i++) {
546 PrefixSumV[i] = PrefixSumV[i-1] + LineCountV[i-1];
553 for (
TInt i = 0; i < RowLen; i++) {
554 switch (ColTypes[i]) {
556 T->IntCols[IntColIdx].Gen(Cnt);
560 T->FltCols[FltColIdx].Gen(Cnt);
569 omp_set_num_threads(NumThreads);
570 #pragma omp parallel for schedule(dynamic) reduction(+:Cnt)
571 for (
int i = 0; i < NumThreads; i++) {
579 if (FieldsV.
Len() != S.
Len()) {
584 TInt RowIdx = PrefixSumV[i] + k;
586 for (
TInt j = 0; j < RowLen; j++) {
587 switch (ColTypes[j]) {
589 if (RelevantCols.
Len() == 0) {
590 T->IntCols[IntColIdx][RowIdx] = \
593 T->IntCols[IntColIdx][RowIdx] = \
599 if (RelevantCols.
Len() == 0) {
600 T->FltCols[FltColIdx][RowIdx] = \
603 T->FltCols[FltColIdx][RowIdx] = \
619 T->NumValidRows = T->NumRows;
624 omp_set_num_threads(NumThreads);
625 #pragma omp parallel for schedule(dynamic, 10000)
626 for (
int64 i = 0; i < Cnt-1; i++) {
630 T->Next[Cnt-1] =
Last;
631 T->LastValidRow = T->NumRows - 1;
633 T->IdColName =
"_id";
634 TInt IdCol = T->IntCols.Add();
635 T->IntCols[IdCol].Gen(Cnt);
638 omp_set_num_threads(NumThreads);
639 #pragma omp parallel for schedule(dynamic, 10000)
640 for (
int64 i = 0; i < Cnt; i++) {
641 T->IntCols[IdCol][i] = i;
644 T->AddSchemaCol(T->IdColName,
atInt);
645 T->AddColType(T->IdColName,
atInt, T->IntCols.Len()-1);
651 const char& Separator,
TBool HasTitleLine) {
653 int RowLen = T->Sch.Len();
655 for (
int i = 0; i < RowLen; i++) {
656 ColTypes[i] = T->GetSchemaColType(i);
668 for (
int i = 0; i < Ss.
GetFlds(); i++) {
670 int L = strlen(Ss[i]);
671 if (Ss[i][L-1] <
' ') { Ss[i][L-1] = 0; }
687 for (
int i = 0; i < RowLen; i++) {
688 switch (ColTypes[i]) {
690 if (RelevantCols.
Len() == 0) {
691 T->IntCols[IntColIdx].Add(Ss.
GetInt(i));
693 T->IntCols[IntColIdx].Add(Ss.
GetInt(RelevantCols[i]));
698 if (RelevantCols.
Len() == 0) {
699 T->FltCols[FltColIdx].Add(Ss.
GetFlt(i));
701 T->FltCols[FltColIdx].Add(Ss.
GetFlt(RelevantCols[i]));
707 if (RelevantCols.
Len() == 0) {
710 ColIdx = RelevantCols[i];
713 T->AddStrVal(StrColIdx, Sval);
722 T->NumRows =
static_cast<int>(Cnt);
723 T->NumValidRows = T->NumRows;
726 T->Next.Gen(static_cast<int>(Cnt));
727 for (
uint64 i = 0; i < Cnt-1; i++) {
728 T->Next[
static_cast<int>(i)] = static_cast<int>(i+1);
731 T->Next[
static_cast<int>(Cnt-1)] =
Last;
732 T->LastValidRow = T->NumRows - 1;
738 const TIntV& RelevantCols,
const char& Separator,
TBool HasTitleLine) {
740 bool NoStringCols =
true;
744 if (RelevantCols.
Len() == 0) {
747 for (
int i = 0; i < RelevantCols.
Len(); i++) {
748 SR.
Add(S[RelevantCols[i]]);
754 for (
int i = 0; i < SR.
Len(); i++) {
755 if (T->GetSchemaColType(i) ==
atStr) {
756 NoStringCols =
false;
761 if (
GetMP() && NoStringCols) {
765 LoadSSPar(T, S, InFNm, RelevantCols, Separator, HasTitleLine);
767 LoadSSSeq(T, S, InFNm, RelevantCols, Separator, HasTitleLine);
770 LoadSSSeq(T, S, InFNm, RelevantCols, Separator, HasTitleLine);
776 const char& Separator,
TBool HasTitleLine) {
777 return LoadSS(S, InFNm, Context,
TIntV(), Separator, HasTitleLine);
782 printf(
"Table is empty");
785 FILE* F = fopen(OutFNm.
CStr(),
"w");
788 printf(
"failed to open file %s\n", OutFNm.
CStr());
801 for (
TInt i = 0; i < L-1; i++) {
802 fprintf(F,
"%s\t", DSch[i].Val1.CStr());
804 fprintf(F,
"%s\n", DSch[L-1].Val1.CStr());
807 for (
TInt i = 0; i < L; i++) {
808 char C = (i == L-1) ?
'\n' :
'\t';
863 ColTypeIntMap.
Save(SOut);
875 for (
TInt i = 0; i < L-1; i++) {
876 fprintf(OutF,
"%s\t", DSch[i].Val1.CStr());
878 fprintf(OutF,
"%s\n", DSch[L-1].Val1.CStr());
881 for (
TInt i = 0; i < L; i++) {
882 char C = (i == L-1) ?
'\n' :
'\t';
906 for (
TInt i = 0; i < L; i++) {
916 TInt RowIdx = RowI.GetRowIdx();
918 printf(
"ChangeContext in %d %d %d .%s.\n",
926 for (
TInt i = 0; i < L; i++) {
936 TInt RowIdx = RowI.GetRowIdx();
973 for (
TInt i = 0; i < Attrs.
Len(); i++) {
978 for (
TInt i = 0; i < Attrs.
Len(); i++) {
1095 if (
Sch[c].Val1 == NColName) {
1120 Assert(RowIdx != TTable::Invalid);
1121 if (RowIdx == TTable::Last) {
return; }
1134 for (
TInt i = 0; i < KeepV.
Len(); i++) {
1141 if (KeepSize < KeepV.
Len()) {
1159 if (
NumValidRows % NumPartitions != 0) PartitionSize++;
1160 if (PartitionSize < 10) {
1164 Partitions.
Reserve(NumPartitions+1);
1167 TInt currStart = currRow;
1169 TInt currCount = PartitionSize;
1170 while (currRow != TTable::Last) {
1171 if (currCount == 0) {
1172 Partitions.
Add(
TIntPr(currStart, currRow));
1173 currStart = currRow;
1174 currCount = PartitionSize;
1176 currRow =
Next[currRow];
1179 Partitions.
Add(
TIntPr(currStart, currRow));
1182 currRow += PartitionSize;
1183 while (currRow != TTable::Last && currRow <
Next.
Len()) {
1184 if (
Next[currRow] == TTable::Invalid) { currRow++;
continue; }
1185 Partitions.
Add(
TIntPr(currStart, currRow));
1186 currStart = currRow;
1187 currRow += PartitionSize;
1189 Partitions.
Add(
TIntPr(currStart, TTable::Last));
1207 gettimeofday(&timer0, NULL);
1208 double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
1212 if(!UsePhysicalIds && IdColIdx < 0){
1213 TExcept::Throw(
"Grouping: Either use physical row ids, or have an id column");
1219 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
1227 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) //num_threads(1)
1228 for (
int i = 0; i < Partitions.
Len(); i++){
1231 while (RowI < EndI) {
1234 UpdateGrouping<TInt>(Grouping, RowI.
GetIntAttr(GroupByColIdx), idx);
1238 gettimeofday(&timer0, NULL);
1239 double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
1240 printf(
"Grouping time: %f\n", t2 - t1);
1244 #endif // GCC_ATOMIC
1247 TIntV RemainingRows;
1254 RemainingRows.
Add(it->Dat[0]);
1262 RemainingRows.
Add(it->Dat[0]);
1270 RemainingRows.
Add(it->Dat[0]);
1279 if(Cols.
Len() == 1){
1286 GroupAux(NCols, Grouping, Ordered,
"",
true, UniqueVec,
true);
1296 for (
TInt i = 0; i < GroupAndRowIds.Len(); i++) {
1297 IntCols[L-1][GroupAndRowIds[i].Val2] = GroupAndRowIds[i].Val1;
1305 if(!UsePhysicalIds && IdColIdx < 0){
1306 TExcept::Throw(
"Grouping: Either use physical row ids, or have an id column");
1308 TIntV IntGroupByCols;
1309 TIntV FltGroupByCols;
1310 TIntV StrGroupByCols;
1312 for (
TInt c = 0; c < GroupBy.
Len(); c++) {
1319 switch (ColType.
Val1) {
1321 IntGroupByCols.
Add(ColType.
Val2);
1324 FltGroupByCols.
Add(ColType.
Val2);
1327 StrGroupByCols.
Add(ColType.
Val2);
1332 TInt IKLen = IntGroupByCols.
Len();
1333 TInt FKLen = FltGroupByCols.
Len();
1334 TInt SKLen = StrGroupByCols.
Len();
1342 TIntV IKey(IKLen + SKLen, 0);
1343 TFltV FKey(FKLen, 0);
1344 TIntV SKey(SKLen, 0);
1347 for (
TInt c = 0; c < IKLen; c++) {
1348 IKey.
Add(it.GetIntAttr(IntGroupByCols[c]));
1350 for (
TInt c = 0; c < FKLen; c++) {
1351 FKey.
Add(it.GetFltAttr(FltGroupByCols[c]));
1353 for (
TInt c = 0; c < SKLen; c++) {
1354 SKey.
Add(it.GetStrMapById(StrGroupByCols[c]));
1357 if (IKLen > 0) { IKey.
ISort(0, IKey.
Len()-1,
true); }
1358 if (FKLen > 0) { FKey.
ISort(0, FKey.
Len()-1,
true); }
1359 if (SKLen > 0) { SKey.
ISort(0, SKey.
Len()-1,
true); }
1361 for (
TInt c = 0; c < SKLen; c++) {
1368 TInt RowIdx = it.GetRowIdx();
1369 TInt idx = UsePhysicalIds ? it.GetRowIdx() :
IntCols[IdColIdx][it.GetRowIdx()];
1370 if (!Grouping.IsKey(GroupKey)) {
1373 NewGroup.
Val1 = GroupNum;
1375 Grouping.AddDat(GroupKey, NewGroup);
1376 if (GroupColName !=
"") {
1388 if (GroupColName !=
"") {
1412 if (GroupColName !=
"") {
1554 GroupAux(NGroupBy, Grouping, Ordered, NGroupColName,
false, UniqueVec, UsePhysicalIds);
1568 for (
TInt c = 0; c < GroupByAttrs.
Len(); c++) {
1586 TInt NumOfGroups = 0;
1587 TInt GroupingCase = 0;
1590 GroupStmt Stmt(NGroupByAttrs, Ordered, UsePhysicalIds);
1594 if(NGroupByAttrs.
Len() == 1){
1599 GroupByIntColMP(NGroupByAttrs[0], GroupByIntMapping_MP, UsePhysicalIds);
1602 GroupByIntMPKeys[x] = it.GetKey();
1618 #endif // GCC_ATOMIC
1620 NumOfGroups = GroupByIntMapping.
Len();
1625 NumOfGroups = GroupByFltMapping.
Len();
1630 NumOfGroups = GroupByStrMapping.
Len();
1638 GroupAux(NGroupByAttrs, Mapping_aux, Ordered,
"",
false, UniqueVector, UsePhysicalIds);
1640 Mapping.
AddDat(it.GetKey(), it.GetDat().Val2);
1642 NumOfGroups = Mapping.
Len();
1668 #pragma omp parallel for schedule(dynamic)
1670 for (
int g = 0; g < NumOfGroups; g++) {
1671 TIntV* GroupRows = NULL;
1672 switch(GroupingCase){
1677 GroupRows = & GroupByIntMapping.
GetDat(GroupByIntMapping.
GetKey(g));
1680 GroupRows = & GroupByIntMapping.
GetDat(GroupByIntMapping.
GetKey(g));
1683 GroupRows = & GroupByStrMapping.
GetDat(GroupByStrMapping.
GetKey(g));
1687 GroupRows = & GroupByIntMapping_MP.
GetDat(GroupByIntMPKeys[g]);
1703 TIntV& ValidRows = *GroupRows;
1705 if (sz <= 0)
continue;
1708 for (
TInt i = 0; i < sz; i++) {
IntCols[ColIdx][ValidRows[i]] = sz; }
1713 for (
TInt i = 0; i < sz; i++) { V.
Add(
IntCols[AggrColIdx][ValidRows[i]]); }
1714 TInt Res = AggregateVector<TInt>(V, AggOp);
1715 if (AggOp ==
aaMean) { Res = Res / sz; }
1716 for (
TInt i = 0; i < sz; i++) {
IntCols[ColIdx][ValidRows[i]] = Res; }
1719 for (
TInt i = 0; i < sz; i++) { V.
Add(
FltCols[AggrColIdx][ValidRows[i]]); }
1720 TFlt Res = AggregateVector<TFlt>(V, AggOp);
1721 if (AggOp ==
aaMean) { Res /= sz; }
1722 for (
TInt i = 0; i < sz; i++) {
FltCols[ColIdx][ValidRows[i]] = Res; }
1732 for (
TInt i = 0; i < AggrAttrs.
Len(); i++) {
1734 if (Info[i].Val1 != Info[0].Val1) {
1735 TExcept::Throw(
"AggregateCols: Aggregation attributes must have the same type");
1739 if (Info[0].Val1 ==
atInt) {
1744 TInt RowIdx = RI.GetRowIdx();
1746 for (
TInt i = 0; i < AggrAttrs.
Len(); i++) {
1749 IntCols[ResIdx][RowIdx] = AggregateVector<TInt>(V, AggOp);
1751 }
else if (Info[0].Val1 ==
atFlt) {
1756 TInt RowIdx = RI.GetRowIdx();
1758 for (
TInt i = 0; i < AggrAttrs.
Len(); i++) {
1761 FltCols[ResIdx][RowIdx] = AggregateVector<TFlt>(V, AggOp);
1764 TExcept::Throw(
"AggregateCols: Only Int and Flt aggregation supported right now");
1773 for(
int i = 0; i < ik.
Len(); i++){ printf(
"%d ",ik[i].Val);}
1774 for(
int i = 0; i < fk.
Len(); i++){ printf(
"%f ",fk[i].Val);}
1777 for(
int i = 0; i < v.
Len(); i++){ printf(
"%d ",v[i].Val);}
1784 GroupByAttrs.
Add(CountColName);
1801 GroupAux(NGroupBy, Grouping, Ordered,
"",
false, UniqueVec);
1811 ColInfo.
Add(GroupTable->GetColTypeMap(
Sch[i].Val1));
1813 ColInfo[i].Val2 = -1;
1821 for (
TInt i = 0; i < Rows.
Len(); i++) {
1830 if (ColIdx == -1) {
continue; }
1833 switch (Info.
Val1) {
1835 GroupTable->IntCols[ColIdx].Add(
IntCols[V[c]][RowIdx]);
1838 GroupTable->FltCols[ColIdx].Add(
FltCols[V[c]][RowIdx]);
1841 GroupTable->StrColMaps[ColIdx].Add(
StrColMaps[V[c]][RowIdx]);
1846 if (GroupTable->LastValidRow >= 0) {
1847 GroupTable->Next[GroupTable->LastValidRow] = GroupTable->NumRows;
1849 GroupTable->Next.Add(GroupTable->Last);
1850 GroupTable->LastValidRow = GroupTable->NumRows;
1852 GroupTable->NumRows++;
1853 GroupTable->NumValidRows++;
1855 GroupTable->InitIds();
1856 Result.
Add(GroupTable);
1874 IntCols[IdColIdx][RI.GetRowIdx()] = IdCnt;
1888 IntCols[IdCol][RI.GetRowIdx()] = IdCnt;
1904 TStr CName = JointTable->RenumberColName(ColName);
1906 JointTable->AddColType(CName, TypeMap);
1908 JointTable->AddSchemaCol(CName, ColType);
1913 TStr CName = JointTable->RenumberColName(ColName);
1917 switch (NewDat.
Val1) {
1928 JointTable->AddColType(CName, NewDat);
1929 JointTable->AddSchemaCol(CName, ColType);
1933 JointTable->AddSchemaCol(IdColName,
atInt);
1978 if(Cols1.
Len()!=Cols2.
Len()){
1982 for (
TInt i = 0; i < Cols1.
Len(); i++) {
1996 TExcept::Throw(
"Column type not supported. Only Flt and Int column types are supported.");
2013 for(
TInt i = 0; i < Cols1.
Len(); i++) {
2014 float attrVal1, attrVal2;
2015 attrVal1 =
GetColType(Cols1[i])==
atInt ? (float)RowI.GetIntAttr(Cols1[i]) : (float)RowI.GetFltAttr(Cols1[i]);
2016 attrVal2 = Table.
GetColType(Cols2[i])==
atInt ? (float)RowI2.GetIntAttr(Cols2[i]) : (float)RowI2.GetFltAttr(Cols2[i]);
2017 distance += pow(attrVal1 - attrVal2, 2);
2020 distance = sqrt(distance);
2022 if(distance<=Threshold){
2023 JointTable->AddJointRow(*
this, Table, RowI.GetRowIdx(), RowI2.GetRowIdx());
2024 DistanceV.
Add(distance);
2033 TExcept::Throw(
"Haversine disance expects exactly two attributes - latitude and longitude - in that order.");
2038 float Latitude1 =
GetColType(Cols1[0])==
atInt ? (float)RowI.GetIntAttr(Cols1[0]) : (float)RowI.GetFltAttr(Cols1[0]);
2039 float Latitude2 = Table.
GetColType(Cols2[0])==
atInt ? (float)RowI2.GetIntAttr(Cols2[0]) : (float)RowI2.GetFltAttr(Cols2[0]);
2041 float Longitude1 =
GetColType(Cols1[1])==
atInt ? (float)RowI.GetIntAttr(Cols1[1]) : (float)RowI.GetFltAttr(Cols1[1]);
2042 float Longitude2 = Table.
GetColType(Cols2[1])==
atInt ? (float)RowI2.GetIntAttr(Cols2[1]) : (float)RowI2.GetFltAttr(Cols2[1]);
2044 Latitude1 *=
static_cast<float>(M_PI/180.0);
2045 Latitude2 *=
static_cast<float>(M_PI/180.0);
2046 Longitude1 *=
static_cast<float>(M_PI/180.0);
2047 Longitude2 *=
static_cast<float>(M_PI/180.0);
2049 float dlon = Longitude2 - Longitude1;
2050 float dlat = Latitude2 - Latitude1;
2051 float a = pow(sin(dlat/2), 2) + cos(Latitude1)*cos(Latitude2)*pow(sin(dlon/2), 2);
2052 float c = 2*atan2(sqrt(a), sqrt(1-a));
2053 distance = (
static_cast<float>(Radius.
Val))*c;
2055 if(distance<=Threshold){
2056 JointTable->AddJointRow(*
this, Table, RowI.GetRowIdx(), RowI2.GetRowIdx());
2057 DistanceV.
Add(distance);
2069 JointTable->StoreFltCol(DistanceColName, DistanceV);
2070 JointTable->InitIds();
2085 for(
TInt i=0;i<2;i++){
2089 JointTable->AddColType(CName, Group);
2090 JointTable->AddSchemaCol(CName,
atInt);
2094 JointTable->AddColType(DistanceColName, Group);
2095 JointTable->AddSchemaCol(DistanceColName,
atFlt);
2104 TInt GroupId =
IntCols[GroupColIdx][RowI.GetRowIdx()];
2108 if(!TIntHH.
IsKey(GroupId)){
2110 TIntHH.
AddDat(GroupId, TIntH);
2115 TIntH.
AddDat(SimAttrVal, 0);
2126 TInt GroupId1 = it1.GetKey();
2129 int intersectionCount = 0;
2130 TInt GroupId2 = it2.GetKey();
2135 TInt Val = it.GetKey();
2136 if(Vals2H.
IsKey(Val)){
2137 intersectionCount+=1;
2141 int unionCount = Vals1H.
Len() + Vals2H.
Len() - intersectionCount;
2142 float distance = 1.0f - (float)intersectionCount/unionCount;
2145 if(distance<=Threshold){
2146 JointTable->IntCols[0].Add(GroupId1);
2147 JointTable->IntCols[1].Add(GroupId2);
2148 JointTable->FltCols[0].Add(distance);
2149 JointTable->IncrementNext();
2154 JointTable->InitIds();
2161 const TStr& DistanceColName,
const TSimType& SimType,
const TFlt& Threshold) {
2166 for(
TInt i=0; i<GroupBy.
Len(); i++)
2168 ProjectionV.
Add(GroupBy[i]);
2171 ProjectionV.
Add(SimCol);
2174 TStr CName =
"Group";
2177 GroupAux(NGroupBy, Grouping,
false, CName,
false, UniqueVec);
2191 if(!GroupIdH.
IsKey(GroupNum))
2193 TInt RandomRowId = RowIds[0];
2194 GroupIdH.
AddDat(GroupNum, RandomRowId);
2198 for(
TRowIterator RowI = GroupJointTable->BegRI(); RowI < GroupJointTable->EndRI(); RowI++)
2201 TInt GroupId1 = GroupJointTable->IntCols[0][RowI.GetRowIdx()];
2202 TInt GroupId2 = GroupJointTable->IntCols[1][RowI.GetRowIdx()];
2207 JointTable->AddJointRow(*
this, *
this, RowId1, RowId2);
2212 JointTable->StoreFltCol(DistanceColName, GroupJointTable->FltCols[0]);
2214 ProjectionV.
Add(DistanceColName);
2218 for(
TInt i=0; i<GroupBy.
Len(); i++){
2219 for(
TInt j=0; j<JointTable->Sch.Len(); j++)
2221 TStr ColName = JointTable->Sch[j].Val1;
2222 if(ColName.
IsStrIn(GroupBy[i]))
2224 ProjectionV.
Add(ColName);
2229 JointTable->ProjectInPlace(ProjectionV);
2230 JointTable->InitIds();
2256 printf(
"no such column %s\n", Col1.
CStr());
2260 printf(
"no such column %s\n", Col2.
CStr());
2264 printf(
"Trying to Join on columns of different type\n");
2273 const TTable& TS = ThisIsSmaller ? *
this : Table;
2274 const TTable& TB = ThisIsSmaller ? Table : *
this;
2275 TStr ColS = ThisIsSmaller ? Col1 : Col2;
2276 TStr ColB = ThisIsSmaller ? Col2 : Col1;
2293 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
2298 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
2299 for (
int i = 0; i < Partitions.
Len(); i++){
2301 JointRowIDSet[i].
Reserve(PartitionSize);
2304 while (RowI < EndI) {
2308 for(
TInt j = 0; j < Group.
Len(); j++){
2324 JointTable->AddNJointRowsMP(*
this, Table, JointRowIDSet);
2335 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
2338 #pragma omp parallel for schedule(dynamic)
2339 for (
int i = 0; i < Partitions.
Len(); i++){
2340 JointRowIDSet[i].
Reserve(PartitionSize);
2343 while (RowI < EndI) {
2347 for(
TInt j = 0; j < Group.
Len(); j++){
2358 JointTable->AddNJointRowsMP(*
this, Table, JointRowIDSet);
2367 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
2370 #pragma omp parallel for schedule(dynamic)
2371 for (
int i = 0; i < Partitions.
Len(); i++){
2372 JointRowIDSet[i].
Reserve(PartitionSize);
2375 while (RowI < EndI) {
2379 for(
TInt j = 0; j < Group.
Len(); j++){
2390 JointTable->AddNJointRowsMP(*
this, Table, JointRowIDSet);
2395 #endif // GCC_ATOMIC
2401 TInt K = RowI.GetIntAttr(ColBId);
2404 for (
TInt i = 0; i < Group.
Len(); i++) {
2405 if (ThisIsSmaller) {
2406 JointTable->AddJointRow(*
this, Table, Group[i], RowI.GetRowIdx());
2408 JointTable->AddJointRow(*
this, Table, RowI.GetRowIdx(), Group[i]);
2419 TFlt K = RowI.GetFltAttr(ColBId);
2422 for (
TInt i = 0; i < Group.
Len(); i++) {
2423 if (ThisIsSmaller) {
2424 JointTable->AddJointRow(*
this, Table, Group[i], RowI.GetRowIdx());
2426 JointTable->AddJointRow(*
this, Table, RowI.GetRowIdx(), Group[i]);
2437 TInt K = RowI.GetStrMapById(ColBId);
2440 for (
TInt i = 0; i < Group.
Len(); i++) {
2441 if (ThisIsSmaller) {
2442 JointTable->AddJointRow(*
this, Table, Group[i], RowI.GetRowIdx());
2444 JointTable->AddJointRow(*
this, Table, RowI.GetRowIdx(), Group[i]);
2459 const TStr& KeyCol2,
const TStr& JoinCol2){
2461 printf(
"no such column %s\n", KeyCol1.
CStr());
2465 printf(
"no such column %s\n", KeyCol2.
CStr());
2469 printf(
"no such column %s\n", JoinCol1.
CStr());
2473 printf(
"no such column %s\n", JoinCol2.
CStr());
2477 printf(
"Trying to Join on columns of different type\n");
2481 printf(
"Key type mismatch\n");
2493 if(JoinColType ==
atStr){
2494 JVal = RowI.GetStrMapById(JoinColIdxB);
2496 JVal = RowI.GetIntAttr(JoinColIdxB);
2502 if(KeyType ==
atStr){
2503 KeyB = RowI.GetStrMapById(KeyColIdxB);
2505 KeyB = RowI.GetIntAttr(KeyColIdxB);
2509 for(
int i = 0; i < RelevantRows.
Len(); i++){
2512 if(KeyType ==
atStr){
2513 KeyS = TS.
StrColMaps[KeyColIdxS][RelevantRows[i]];
2515 KeyS = TS.
IntCols[KeyColIdxS][RelevantRows[i]];
2519 if(Counters.
IsKey(Keys)){
2527 Counters.
AddDat(Keys,
TIntTr(RelevantRows[i], RowI.GetRowIdx(),1));
2529 Counters.
AddDat(Keys,
TIntTr(RowI.GetRowIdx(), RelevantRows[i],1));
2543 if(JoinColType ==
atStr){
2544 JVal = RowI.GetStrMapById(JoinColIdxB);
2546 JVal = RowI.GetIntAttr(JoinColIdxB);
2552 if(KeyType ==
atStr){
2553 KeyB = RowI.GetStrMapById(KeyColIdxB);
2555 KeyB = RowI.GetIntAttr(KeyColIdxB);
2559 for(
int i = 0; i < RelevantRows.
Len(); i++){
2562 if(KeyType ==
atStr){
2563 KeyS = TS.
StrColMaps[KeyColIdxS][RelevantRows[i]];
2565 KeyS = TS.
IntCols[KeyColIdxS][RelevantRows[i]];
2570 if(Counters.
IsKey(K)){
2578 Counters.
AddDat(K,
TIntTr(RelevantRows[i], RowI.GetRowIdx(),1));
2580 Counters.
AddDat(K,
TIntTr(RowI.GetRowIdx(), RelevantRows[i],1));
2592 TIntTr& Counter = iter.GetDat();
2595 if(Counter.
Val3 >= Threshold){
2596 JointTable->AddJointRow(*
this, Table, Counter.
Val1, Counter.
Val2);
2605 const TIntTr& Counter = iter.GetDat();
2606 const TIntTr& Keys = iter.GetKey();
2608 if(Counter.
Val3 >= Threshold){
2610 if(!Pairs.
IsKey(K)){
2612 JointTable->AddJointRow(*
this, Table, Counter.
Val1, Counter.
Val2);
2635 const TTable& TS = ThisIsSmaller ? *
this : Table;
2636 const TTable& TB = ThisIsSmaller ? Table : *
this;
2637 TStr JoinColS = JoinCol1;
2642 JoinColS = JoinCol2;
2654 printf(
"ThresholdJoin only supports integer or string key attributes\n");
2655 TExcept::Throw(
"ThresholdJoin only supports integer or string key attributes");
2657 if(JoinColType !=
atInt && JoinColType !=
atStr){
2658 printf(
"ThresholdJoin only supports integer or string join attributes\n");
2659 TExcept::Throw(
"ThresholdJoin only supports integer or string join attributes");
2664 if(JoinColType ==
atInt){
2666 }
else if(JoinColType ==
atStr){
2669 TExcept::Throw(
"ThresholdJoin only supports integer or string join attributes");
2734 TInt NumRelevantCols = RelevantCols.
Len();
2737 for (
TInt i = 0; i < NumRelevantCols; i++) {
2739 ColIndices[i] =
GetColIdx(RelevantCols[i]);
2746 for (
TInt i = 0; i < NumRelevantCols; i++) {
2747 switch (ColTypes[i]) {
2759 if (!Predicate.
Eval()) {
2767 for (
TInt i = 0; i < NumRelevantCols; i++) {
2768 switch (ColTypes[i]) {
2770 Predicate.
SetIntVal(RelevantCols[i], RowI.GetIntAttr(RelevantCols[i]));
2773 Predicate.
SetFltVal(RelevantCols[i], RowI.GetFltAttr(RelevantCols[i]));
2776 Predicate.
SetStrVal(RelevantCols[i], RowI.GetStrAttr(RelevantCols[i]));
2780 if (Predicate.
Eval()) { SelectedRows.
Add(RowI.GetRowIdx()); }
2787 Select(Predicate, SelectedRows,
false);
2788 ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
2841 if (Result) { SelectedRows.
Add(RowI.GetRowIdx()); }
2847 const TStr& LabelName,
const TInt& PositiveLabel,
const TInt& NegativeLabel) {
2850 ClassifyAux(SelectedRows, LabelName, PositiveLabel, NegativeLabel);
2861 TExcept::Throw(
"SelectAtomicConst: coltype does not match const type");
2871 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
2872 int RemoveCount = 0;
2879 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) reduction(+:RemoveCount)
2880 for (
int i = 0; i < Partitions.
Len(); i++){
2887 while (RowI < EndI) {
2890 if (Type !=
atStr) {
2900 if (First) { FirstRowIdx = CurrRowIdx; First =
false; }
2901 else {
Next[LastRowIdx] = CurrRowIdx; }
2902 LastRowIdx = CurrRowIdx;
2905 Bounds[i] =
TIntPr(FirstRowIdx, LastRowIdx);
2914 while (CurrBound < Bounds.Len() && Bounds[CurrBound].Val1 ==
TTable::Invalid) {
2917 if (CurrBound == Bounds.Len()) {
2927 TInt PrevBound = CurrBound;
2929 while (CurrBound < Bounds.Len()) {
2930 if (Bounds[CurrBound].Val1 == TTable::Invalid) { CurrBound++;
continue; }
2931 Next[Bounds[PrevBound].Val2] = Bounds[CurrBound].Val1;
2933 PrevBound = CurrBound;
2962 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
2966 int TotalSelectedRows = 0;
2967 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) reduction(+:TotalSelectedRows)
2968 for (
int i = 0; i < Partitions.
Len(); i++){
2971 while (RowI < EndI) {
2972 if (Type !=
atStr) {
2974 TotalSelectedRows++;
2978 TotalSelectedRows++;
2987 SelectedTable->ResizeTable(TotalSelectedRows);
2991 if (TotalSelectedRows == 0) {
2996 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
2997 for (
int i = 0; i < Partitions.
Len(); i++){
2998 TIntV LocalSelectedRows;
2999 LocalSelectedRows.
Reserve(PartitionSize);
3002 while (RowI < EndI) {
3003 if (Type !=
atStr) {
3014 SelectedTable->AddSelectedRows(*
this, LocalSelectedRows);
3024 SelectedTable->SetFirstValidRow();
3028 if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {
3029 SelectedTable->AddRow(RowI);
3037 if (RowI.CompareAtomicConst(ColIdx, Val, Cmp)) {
3038 SelectedRows.
Add(RowI.GetRowIdx());
3046 switch (CompareByType) {
3048 if (
IntCols[CompareByIndex][R1] >
IntCols[CompareByIndex][R2]) {
return (Asc ? 1 : -1); }
3049 if (
IntCols[CompareByIndex][R1] <
IntCols[CompareByIndex][R2]) {
return (Asc ? -1 : 1); }
3053 if (
FltCols[CompareByIndex][R1] >
FltCols[CompareByIndex][R2]) {
return (Asc ? 1 : -1); }
3054 if (
FltCols[CompareByIndex][R1] <
FltCols[CompareByIndex][R2]) {
return (Asc ? -1 : 1); }
3060 int CmpRes = strcmp(S1.
CStr(), S2.
CStr());
3061 return (Asc ? CmpRes : -CmpRes);
3069 for (
TInt i = 0; i < CompareByTypes.
Len(); i++) {
3070 TInt res =
CompareRows(R1, R2, CompareByTypes[i], CompareByIndices[i], Asc);
3071 if (res != 0) {
return res; }
3077 if (StartIdx < EndIdx) {
3078 for (
TInt i = StartIdx+1; i <= EndIdx; i++) {
3081 while ((StartIdx < j) && (
CompareRows(V[j-1], Val, SortByTypes, SortByIndices, Asc) > 0)) {
3091 TInt L = EndIdx - StartIdx + 1;
3095 if (
CompareRows(V[Idx1], V[Idx2], SortByTypes, SortByIndices, Asc) < 0) {
3096 if (
CompareRows(V[Idx2], V[Idx3], SortByTypes, SortByIndices, Asc) < 0) {
return Idx2; }
3097 if (
CompareRows(V[Idx1], V[Idx3], SortByTypes, SortByIndices, Asc) < 0) {
return Idx3; }
3100 if (
CompareRows(V[Idx3], V[Idx2], SortByTypes, SortByIndices, Asc) < 0) {
return Idx2; }
3101 if (
CompareRows(V[Idx3], V[Idx1], SortByTypes, SortByIndices, Asc) < 0) {
return Idx3; }
3110 for (j = StartIdx; j < EndIdx; j++) {
3111 if (
CompareRows(V[j], V[j+1], SortByTypes, SortByIndices, Asc) > 0) {
3119 TInt PivotIdx =
GetPivot(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);
3120 TInt Pivot = V[PivotIdx];
3121 V.
Swap(PivotIdx, EndIdx);
3122 TInt StoreIdx = StartIdx;
3123 for (
TInt i = StartIdx; i < EndIdx; i++) {
3124 if (
CompareRows(V[i], Pivot, SortByTypes, SortByIndices, Asc) <= 0) {
3125 V.
Swap(i, StoreIdx);
3130 V.
Swap(StoreIdx, EndIdx);
3135 if (StartIdx < EndIdx) {
3136 if (EndIdx - StartIdx < 20) {
3137 ISort(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);
3139 TInt Pivot =
Partition(V, StartIdx, EndIdx, SortByTypes, SortByIndices, Asc);
3140 if (Pivot > EndIdx) {
3149 V[Ub], V[Pivot], SortByTypes, SortByIndices, Asc) == 0) {
3152 QSort(V, StartIdx, Ub, SortByTypes, SortByIndices, Asc);
3153 QSort(V, Pivot+1, EndIdx, SortByTypes, SortByIndices, Asc);
3159 TInt i = Idx1, j = Idx2;
3161 while (i < Idx2 && j < Idx3) {
3162 if (
CompareRows(V[i], V[j], SortByTypes, SortByIndices, Asc) <= 0) {
3180 for (
TInt sz = 0; sz < Idx3 - Idx1; sz++) {
3181 V[Idx1 + sz] = SortedV[sz];
3187 TInt NumThreads = 8;
3190 for (
TInt i = 0; i < NumThreads; i++) {
3191 IndV.
Add(i * (Sz / NumThreads));
3195 omp_set_num_threads(NumThreads);
3196 #pragma omp parallel for
3197 for (
int i = 0; i < NumThreads; i++) {
3198 QSort(V, IndV[i], IndV[i+1] - 1, SortByTypes, SortByIndices, Asc);
3201 while (NumThreads > 1) {
3202 omp_set_num_threads(NumThreads / 2);
3203 #pragma omp parallel for
3204 for (
int i = 0; i < NumThreads; i += 2) {
3205 Merge(V, IndV[i], IndV[i+1], IndV[i+2], SortByTypes, SortByIndices, Asc);
3209 for (
TInt i = 0; i < NumThreads; i+=2) {
3215 NumThreads = NumThreads / 2;
3218 #endif // USE_OPENMP
3230 ValidRows[i] = RI.GetRowIdx();
3235 TIntV OrderByIndices(OrderBy.
Len());
3236 for (
TInt i = 0; i < OrderBy.
Len(); i++) {
3238 OrderByIndices[i] =
GetColIdx(OrderBy[i]);
3244 QSortPar(ValidRows, OrderByTypes, OrderByIndices, Asc);
3260 Next[ValidRows[i]] = ValidRows[i+1];
3262 if (NumValidRows > 0) {
3263 Next[ValidRows[NumValidRows-1]] =
Last;
3270 if (!OrderColName.
Empty()) {
3273 RankCol[ValidRows[i]] = i;
3275 if (ResetRankByMSC) {
3277 TStr GroupName = OrderBy[0];
3279 RankCol[ValidRows[i]] = 0;
3281 RankCol[ValidRows[i]] = RankCol[ValidRows[i-1]] + 1;
3298 if (
Next[i] != TTable::Invalid) {
3300 if (FreeIndex == 0) {
3306 Next[FreeIndex] = FreeIndex + 1;
3307 Mapping.
Add(FreeIndex);
3329 Mapping.
Add(TTable::Invalid);
3345 if (!(RowI <
EndRI())) {
3357 TInt CurrId = LastId;
3369 if (!NodeVals.
IsKey(NodeId)) {
3370 Graph->AddNode(NodeId);
3382 Graph->AddIntAttrDatE(RowId,
IntCols[Index][RowId], ColName);
3385 Graph->AddFltAttrDatE(RowId,
FltCols[Index][RowId], ColName);
3388 Graph->AddStrAttrDatE(RowId,
GetStrVal(Index, RowId), ColName);
3396 for (
TInt i = 0; i < NodeAttrV.
Len(); i++) {
3397 TStr ColAttr = NodeAttrV[i];
3408 if (!NodeIntAttrs.
IsKey(NId)) { NodeIntAttrs.
AddKey(NId); }
3411 }
else if (CT ==
atFlt) {
3412 if (!NodeFltAttrs.
IsKey(NId)) { NodeFltAttrs.
AddKey(NId); }
3416 if (!NodeStrAttrs.
IsKey(NId)) { NodeStrAttrs.
AddKey(NId); }
3444 TInt CurrRowIdx = *it;
3448 if (NodeType ==
atFlt) {
3453 }
else if (NodeType ==
atInt || NodeType ==
atStr) {
3454 if (NodeType ==
atInt) {
3455 SVal =
IntCols[SrcColIdx][CurrRowIdx];
3456 DVal =
IntCols[DstColIdx][CurrRowIdx];
3463 if (!Graph->IsNode(SVal)) { Graph->AddNode(SVal); }
3464 if (!Graph->IsNode(DVal)) { Graph->AddNode(DVal); }
3470 Graph->AddEdge(SVal, DVal, CurrRowIdx);
3484 for (
TNEANet::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
3485 TInt NId = NodeI.GetId();
3486 if (NodeIntAttrs.
IsKey(NId)) {
3490 Graph->AddIntAttrDatN(NId, AttrVal, it.GetKey());
3493 if (NodeFltAttrs.
IsKey(NId)) {
3497 Graph->AddFltAttrDatN(NId, AttrVal, it.GetKey());
3500 if (NodeStrAttrs.
IsKey(NId)) {
3504 Graph->AddStrAttrDatN(NId, AttrVal, it.GetKey());
3522 for (
TInt i = 0; i < NumBuckets; i++) {
3528 Assert (JumpSize <= WindowSize);
3529 int NumBuckets, MinBucket, MaxBucket;
3538 if (MinValue >
IntCols[SplitColId][i]) {
3539 MinValue =
IntCols[SplitColId][i];
3541 if (MaxValue <
IntCols[SplitColId][i]) {
3542 MaxValue =
IntCols[SplitColId][i];
3547 if (StartVal ==
TInt::Mn) StartVal = MinValue;
3548 if (EndVal ==
TInt::Mx) EndVal = MaxValue;
3552 if (JumpSize == 0) { NumBuckets = (EndVal - StartVal)/JumpSize + 1; }
3553 else { NumBuckets = (EndVal - StartVal)/JumpSize + 1; }
3560 int SplitVal =
IntCols[SplitColId][i];
3561 if (SplitVal < StartVal || SplitVal > EndVal) {
continue; }
3562 int RowVal = SplitVal - StartVal;
3563 if (JumpSize == 0) {
3564 MinBucket = RowVal/WindowSize;
3565 MaxBucket = NumBuckets-1;
3566 }
else if (JumpSize == WindowSize) {
3567 MinBucket = MaxBucket = RowVal/JumpSize;
3569 if (RowVal < WindowSize) { MinBucket = 0; }
3570 else { MinBucket = (RowVal-WindowSize)/JumpSize + 1; }
3571 MaxBucket = RowVal/JumpSize;
3579 int NumBuckets = SplitIntervals.
Len();
3585 int SplitVal =
IntCols[SplitColId][i];
3586 for (
TInt j = 0; j < SplitIntervals.
Len(); j++) {
3587 if (SplitVal >= SplitIntervals[j].Val1 && SplitVal < SplitIntervals[j].Val2) {
3600 GraphSequence.
Add(PNet);
3603 return GraphSequence;
3631 printf(
"buckets filled\n");
3679 for (
TInt i = 0; i < IntAttrNames.
Len(); i++) {
3682 for (
TInt i = 0; i < FltAttrNames.
Len(); i++) {
3685 for (
TInt i = 0; i < StrAttrNames.
Len(); i++) {
3693 while (NodeI < Network->EndNI()) {
3694 T->IntCols[0].Add(NodeI.
GetId());
3695 for (
TInt i = 0; i < IntAttrNames.
Len(); i++) {
3696 T->IntCols[i+1].Add(Network->GetIntAttrDatN(NodeI,IntAttrNames[i]));
3698 for (
TInt i = 0; i < FltAttrNames.
Len(); i++) {
3699 T->FltCols[i].Add(Network->GetFltAttrDatN(NodeI,FltAttrNames[i]));
3701 for (
TInt i = 0; i < StrAttrNames.
Len(); i++) {
3702 T->AddStrVal(i, Network->GetStrAttrDatN(NodeI,StrAttrNames[i]));
3709 T->NumValidRows = T->NumRows;
3710 T->Next =
TIntV(T->NumRows,0);
3711 for (
TInt i = 0; i < T->NumRows-1; i++) {
3714 T->LastValidRow = T->NumRows-1;
3733 for (
TInt i = 0; i < IntAttrNames.
Len(); i++) {
3736 for (
TInt i = 0; i < FltAttrNames.
Len(); i++) {
3739 for (
TInt i = 0; i < StrAttrNames.
Len(); i++) {
3748 while (EdgeI < Network->EndEI()) {
3749 T->IntCols[0].Add(EdgeI.
GetId());
3752 for (
TInt i = 0; i < IntAttrNames.
Len(); i++) {
3753 T->IntCols[i+3].Add(Network->GetIntAttrDatE(EdgeI,IntAttrNames[i]));
3755 for (
TInt i = 0; i < FltAttrNames.
Len(); i++) {
3756 T->FltCols[i].Add(Network->GetFltAttrDatE(EdgeI,FltAttrNames[i]));
3758 for (
TInt i = 0; i < StrAttrNames.
Len(); i++) {
3759 T->AddStrVal(i, Network->GetStrAttrDatE(EdgeI,StrAttrNames[i]));
3766 T->NumValidRows = T->NumRows;
3767 T->Next =
TIntV(T->NumRows,0);
3768 for (
TInt i = 0; i < T->NumRows-1; i++) {
3771 T->LastValidRow = T->NumRows-1;
3784 TInt NumEdges = Network->GetEdges();
3785 TInt NumPartitions = omp_get_max_threads()*CHUNKS_PER_THREAD;
3786 TInt PartitionSize = NumEdges/NumPartitions;
3787 if (PartitionSize*NumPartitions < NumEdges) { NumPartitions++;}
3791 TIntV PartitionSizes;
3794 while (FirstEI < Network->EndEI()){
3795 if (currCount == PartitionSize) {
3796 Partitions.
Add(TEIPr(currStart, FirstEI));
3797 currStart = FirstEI;
3798 PartitionSizes.
Add(currCount);
3806 Partitions.
Add(TEIPr(currStart, FirstEI));
3807 PartitionSizes.
Add(currCount);
3809 T->ResizeTable(NumEdges);
3810 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
3811 for (
int p = 0; p < Partitions.
Len(); p++) {
3815 int start = T->GetEmptyRowsStart(PartitionSizes[p]);
3816 while (EdgeI < EndI) {
3817 T->IntCols[0][start] = EdgeI.
GetSrcNId();
3818 T->IntCols[1][start] = EdgeI.
GetDstNId();
3820 if (EdgeI < EndI) { T->Next[start] = start+1;}
3825 Assert(T->NumRows == NumEdges);
3828 #endif // GCC_ATOMIC
3831 const TStr& NodeAttrName,
const TAttrType& NodeAttrType,
const TStr& PropertyAttrName,
3838 TInt NodeColIdx = T->GetColIdx(NodeAttrName);
3841 for (
TNEANet::TNodeI NodeI = Network->BegNI(); NodeI < Network->EndNI(); NodeI++) {
3842 switch (NodeAttrType) {
3844 T->IntCols[NodeColIdx].Add(Network->GetIntAttrDatN(NodeI,NodeAttrName));
3847 T->FltCols[NodeColIdx].Add(Network->GetFltAttrDatN(NodeI,NodeAttrName));
3850 T->AddStrVal(
TInt(0), Network->GetStrAttrDatN(NodeI,NodeAttrName));
3853 T->FltCols[0].Add(Property.
GetDat(NodeI.GetId()));
3858 T->NumValidRows = T->NumRows;
3859 T->Next =
TIntV(T->NumRows,0);
3860 for (
TInt i = 0; i < T->NumRows-1; i++) {
3863 T->LastValidRow = T->NumRows-1;
3871 if (GroupBy.
Empty()) {
3872 OrderBy.
Add(OrderCol);
3874 OrderBy.
Add(GroupBy);
3875 OrderBy.
Add(OrderCol);
3877 if (RankColName.
Empty()) {
3880 Order(OrderBy, RankColName,
true);
3885 TInt Succ = RI.GetRowIdx();
3886 TBool OutOfGroup =
false;
3887 for (
TInt i = 0; i < K; i++) {
3889 if (Succ ==
Last) {
break; }
3890 switch (GroupByAttrType) {
3892 if (
GetIntVal(GroupBy, Succ) != RI.GetIntAttr(GroupBy)) { OutOfGroup =
true; }
3895 if (
GetFltVal(GroupBy, Succ) != RI.GetFltAttr(GroupBy)) { OutOfGroup =
true; }
3898 if (
GetStrVal(GroupBy, Succ) != RI.GetStrAttr(GroupBy)) { OutOfGroup =
true; }
3901 if (OutOfGroup) {
break; }
3902 T->AddJointRow(*
this, *
this, RI.GetRowIdx(), Succ);
3909 printf(
"Total number of rows: %d\n",
NumRows.
Val);
3911 printf(
"Number of Int columns: %d\n",
IntCols.
Len());
3912 printf(
"Number of Flt columns: %d\n",
FltCols.Len());
3915 printf(
"Approximated size is %lu KB\n", MemUsed);
3919 TSize ApproxSize = 0;
3924 for(
int i = 0; i <
FltCols.Len(); i++){
3925 ApproxSize +=
FltCols[i].GetMemUsed()/1000;
3938 printf(
"Number of strings in pool: ");
3940 printf(
"Number of entries in hash table: ");
3943 printf(
"Approximate memory used for Context: %lu KB\n", MemUsed);
3947 TSize ApproxSize = 0;
3960 if (TColIdx < 0) {
TExcept::Throw(
"when adding a table, it must contain all columns of source table!"); }
3975 for (
TInt i = 0; i < TNext.
Len(); i++) {
3997 TIntV IntGroupByCols;
3998 TIntV FltGroupByCols;
3999 TIntV StrGroupByCols;
4001 TInt IKLen, FKLen, SKLen;
4011 if (
Sch[c] != Table.
Sch[c]) {
4012 printf(
"(%s,%d) != (%s,%d)\n",
Sch[c].Val1.CStr(),
Sch[c].Val2, Table.
Sch[c].Val1.CStr(), Table.
Sch[c].Val2);
4017 switch (ColType.
Val1) {
4019 IntGroupByCols.
Add(ColType.
Val2);
4022 FltGroupByCols.
Add(ColType.
Val2);
4025 StrGroupByCols.
Add(ColType.
Val2);
4030 IKLen = IntGroupByCols.
Len();
4031 FKLen = FltGroupByCols.
Len();
4032 SKLen = StrGroupByCols.
Len();
4035 GroupAux(GroupBy, Grouping,
true,
"",
false, UniqueVec,
true);
4040 TIntV IKey(IKLen + SKLen, 0);
4041 TFltV FKey(FKLen, 0);
4044 for (
TInt c = 0; c < IKLen; c++) {
4045 IKey.
Add(it.GetIntAttr(IntGroupByCols[c]));
4047 for (
TInt c = 0; c < FKLen; c++) {
4048 FKey.
Add(it.GetFltAttr(FltGroupByCols[c]));
4050 for (
TInt c = 0; c < SKLen; c++) {
4051 IKey.
Add(it.GetStrMapById(StrGroupByCols[c]));
4056 TInt RowIdx = it.GetRowIdx();
4057 if (Grouping.
IsKey(GroupKey)) {
4059 Collisions.
AddKey(RowIdx);
4066 printf(
"new column dimension must agree with number of rows\n");
4074 IntCols[ColIdx][RI.GetRowIdx()] = ColVals[i];
4083 printf(
"new column dimension must agree with number of rows\n");
4091 FltCols[ColIdx][RI.GetRowIdx()] = ColVals[i];
4100 printf(
"new column dimension must agree with number of rows\n");
4133 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
4134 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
4135 for (
int i = 0; i < Partitions.
Len(); i++){
4148 return(__sync_bool_compare_and_swap(lock, 0, 1));
4153 TFlt DefaultFltVal) {
4177 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
4187 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD) // num_threads(1)
4188 for (
int i = 0; i < Partitions.
Len(); i++) {
4191 while (RowI < EndI) {
4193 if (Grouping.
IsKey(K)) {
4195 for (
int j = 0; j < UpdateRows.
Len(); j++) {
4196 int* lock = &Locks[UpdateRows[j]].Val;
4217 #endif // GCC_ATOMIC
4220 const TStr& FKeyAttr,
const TStr& ReadAttr,
TFlt DefaultFltVal){
4231 #endif // GCC_ATOMIC
4246 FltCols[UpdateColIdx][iter.GetRowIdx()] = DefaultFltVal;
4255 TInt K = RI.GetIntAttr(NFKeyAttr);
4256 if (Grouping.
IsKey(K)) {
4258 for (
int i = 0; i < UpdateRows.
Len(); i++) {
4259 FltCols[UpdateColIdx][UpdateRows[i]] = RI.GetFltAttr(NReadAttr);
4295 for (
TInt c = 0; c < IntVals.
Len(); c++) {
4298 for (
TInt c = 0; c < FltVals.
Len(); c++) {
4301 for (
TInt c = 0; c < StrVals.
Len(); c++) {
4308 if (RowCount == 0) {
4319 #pragma omp parallel for schedule(static)
4321 for (
int i = 0; i < TotalCols+1; i++) {
4322 if (i < FltOffset) {
4324 }
else if (i < StrOffset) {
4325 FltCols[i-FltOffset].Reserve(RowCount, RowCount);
4326 }
else if (i < TotalCols) {
4332 }
else if (
Next.
Len() > RowCount) {
4337 #pragma omp parallel for schedule(static)
4339 for (
int i = 0; i < TotalCols+1; i++) {
4340 if (i < FltOffset) {
4342 }
else if (i < StrOffset) {
4343 FltCols[i-FltOffset].Trunc(RowCount);
4344 }
else if (i < TotalCols) {
4356 #pragma omp critical
4377 int NewRows = RowIDs.
Len();
4378 if (NewRows == 0) {
return; }
4381 for (
TInt r = 0; r < NewRows; r++) {
4382 TInt CurrRowIdx = RowIDs[r];
4393 for (
TInt r = 0; r < NewRows-1; r++) {
4394 Next[start+r] = start+r+1;
4399 if (NewRows == 0) {
return; }
4402 for (
TInt r = 0; r < NewRows; r++) {
4403 for (
TInt i = 0; i < IntColsP.
Len(); i++) {
4404 IntCols[i][start+r] = IntColsP[i][r];
4406 for (
TInt i = 0; i < FltColsP.
Len(); i++) {
4407 FltCols[i][start+r] = FltColsP[i][r];
4409 for (
TInt i = 0; i < StrColMapsP.
Len(); i++) {
4413 for (
TInt r = 0; r < NewRows-1; r++) {
4414 Next[start+r] = start+r+1;
4421 int JointTableSize = 0;
4422 TIntV StartOffsets(JointRowIDSet.
Len());
4423 for (
int i = 0; i < JointRowIDSet.
Len(); i++) {
4424 StartOffsets[i] = JointTableSize;
4425 JointTableSize += JointRowIDSet[i].
Len();
4427 if (JointTableSize == 0) {
4445 for (
TInt IdCnt = 0; IdCnt < JointTableSize; IdCnt++) {
4449 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
4450 for (
int j = 0; j < JointRowIDSet.
Len(); j++) {
4451 const TIntPrV& RowIDs = JointRowIDSet[j];
4452 int start = StartOffsets[j];
4453 int NewRows = RowIDs.
Len();
4454 if (NewRows == 0) {
continue;}
4455 for (
TInt r = 0; r < NewRows; r++){
4456 TIntPr CurrRowIdPr = RowIDs[r];
4475 IntCols[IdOffset][start+r] = start+r;
4477 for(
TInt r = 0; r < NewRows; r++){
4478 Next[start+r] = start+r+1;
4486 #endif // USE_OPENMP
4496 result->AddTable(*
this);
4497 result->UnionAllInPlace(Table);
4516 ColNames.
Add(
Sch[c].Val1);
4523 result->AddTable(*
this);
4525 result->Unique(ColNames);
4529 if (!Collisions.
IsKey(it.GetRowIdx())) {
4559 if (Collisions.
IsKey(it.GetRowIdx())) {
4584 if (!Collisions.
IsKey(it.GetRowIdx())) {
4594 for (
TInt c = 0; c < ProjectCols.
Len(); c++) {
4600 result->AddTable(*
this);
4610 TStr NColName = ColName;
4611 if (NColName.
GetCh(NColName.
Len()-2) ==
'-') {
4616 if (NColName ==
Sch[i].Val1.GetSubStr(0,
Sch[i].Val1.
Len()-3)) {
4621 NColName = NColName +
"-" + Conflicts.
GetStr();
4626 TStr DColName = ColName;
4627 if (DColName.
Len() == 0) {
return DColName; }
4628 if (DColName.
GetCh(0) ==
'_') {
return DColName; }
4629 if (DColName.
GetCh(DColName.
Len()-2) ==
'-') {
4634 if (DColName ==
Sch[i].Val1.GetSubStr(0,
Sch[i].Val1.
Len()-3)) {
4638 if (Conflicts > 1) {
return ColName; }
4639 else {
return DColName; }
4677 IntCols[LabelColIdx][i] = NegativeLabel;
4679 for (
TInt i = 0; i < SelectedRows.
Len(); i++) {
4680 IntCols[LabelColIdx][SelectedRows[i]] = PositiveLabel;
4690 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
4691 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
4692 for (
int i = 0; i < Partitions.
Len(); i++){
4696 if(ResType ==
atInt){
4721 #endif // USE_OPENMP
4737 if (Arg1Type ==
atStr || Arg2Type ==
atStr) {
4738 TExcept::Throw(
"Only numeric columns supported in arithmetic operations.");
4740 if(Arg1Type ==
atInt && Arg2Type ==
atFlt && ResAttr ==
""){
4741 TExcept::Throw(
"Trying to write float values to an existing int-typed column");
4748 TInt ColIdx3 = ColIdx1;
4750 if (ResAttr !=
"") {
4751 if (Arg1Type ==
atInt && Arg2Type ==
atInt) {
4761 ColGenericOpMP(ColIdx1, ColIdx2, Arg1Type, Arg2Type, ColIdx3, op);
4766 if(Arg1Type ==
atInt && Arg2Type ==
atInt){ printf(
"hooray!\n"); ResType =
atInt;}
4769 if(ResType ==
atInt){
4770 TInt V1 = RowI.GetIntAttr(ColIdx1);
4771 TInt V2 = RowI.GetIntAttr(ColIdx2);
4772 if (op ==
aoAdd) {
IntCols[ColIdx3][RowI.GetRowIdx()] = V1 + V2; }
4773 if (op ==
aoSub) {
IntCols[ColIdx3][RowI.GetRowIdx()] = V1 - V2; }
4774 if (op ==
aoMul) {
IntCols[ColIdx3][RowI.GetRowIdx()] = V1 * V2; }
4775 if (op ==
aoDiv) {
IntCols[ColIdx3][RowI.GetRowIdx()] = V1 / V2; }
4776 if (op ==
aoMod) {
IntCols[ColIdx3][RowI.GetRowIdx()] = V1 % V2; }
4777 if (op ==
aoMin) {
IntCols[ColIdx3][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}
4778 if (op ==
aoMax) {
IntCols[ColIdx3][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}
4780 TFlt V1 = (Arg1Type ==
atInt) ? (
TFlt)RowI.GetIntAttr(ColIdx1) : RowI.GetFltAttr(ColIdx1);
4781 TFlt V2 = (Arg2Type ==
atInt) ? (
TFlt)RowI.GetIntAttr(ColIdx2) : RowI.GetFltAttr(ColIdx2);
4782 if (op ==
aoAdd) {
FltCols[ColIdx3][RowI.GetRowIdx()] = V1 + V2; }
4783 if (op ==
aoSub) {
FltCols[ColIdx3][RowI.GetRowIdx()] = V1 - V2; }
4784 if (op ==
aoMul) {
FltCols[ColIdx3][RowI.GetRowIdx()] = V1 * V2; }
4785 if (op ==
aoDiv) {
FltCols[ColIdx3][RowI.GetRowIdx()] = V1 / V2; }
4787 if (op ==
aoMin) {
FltCols[ColIdx3][RowI.GetRowIdx()] = (V1 < V2) ? V1 : V2;}
4788 if (op ==
aoMax) {
FltCols[ColIdx3][RowI.GetRowIdx()] = (V1 > V2) ? V1 : V2;}
4836 TExcept::Throw(
"Only numeric columns supported in arithmetic operations.");
4838 if(Arg1Type ==
atInt && Arg2Type ==
atFlt && ResAttr ==
""){
4839 TExcept::Throw(
"Trying to write float values to an existing int-typed column");
4846 TInt ColIdx3 = AddToFirstTable ? ColIdx1 : ColIdx2;
4849 if (ResAttr !=
"") {
4850 if (AddToFirstTable) {
4851 if (Arg1Type ==
atInt && Arg2Type ==
atInt) {
4859 if (Arg1Type ==
atInt && Arg2Type ==
atInt) {
4879 RI2 = Table.
BegRI();
4882 while (RI1 <
EndRI() && RI2 < Table.
EndRI()) {
4883 if (ResType ==
atInt) {
4886 if (AddToFirstTable) {
4903 if (AddToFirstTable) {
4921 if (RI1 !=
EndRI() || RI2 != Table.
EndRI()) {
4927 const TStr& ResultAttrName,
TBool AddToFirstTable) {
4932 const TStr& ResultAttrName,
TBool AddToFirstTable) {
4937 const TStr& ResultAttrName,
TBool AddToFirstTable) {
4942 const TStr& ResultAttrName,
TBool AddToFirstTable) {
4947 const TStr& ResultAttrName,
TBool AddToFirstTable) {
4958 if (ArgType ==
atStr) {
4959 TExcept::Throw(
"Only numeric columns supported in arithmetic operations.");
4964 TInt ColIdx2 = ColIdx1;
4967 TBool shouldCast = floatCast;
4968 if (ResAttr !=
"") {
4969 if ((ArgType ==
atInt) & !shouldCast) {
4988 if ((ArgType ==
atInt) && !shouldCast) {
4989 TInt CurVal = RowI.GetIntAttr(ColIdx1);
4990 TInt Val =
static_cast<int>(Num);
4991 if (op ==
aoAdd) {
IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Val; }
4992 if (op ==
aoSub) {
IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Val; }
4993 if (op ==
aoMul) {
IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Val; }
4994 if (op ==
aoDiv) {
IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Val; }
4995 if (op ==
aoMod) {
IntCols[ColIdx2][RowI.GetRowIdx()] = CurVal % Val; }
4998 TFlt CurVal = (ArgType ==
atFlt) ? RowI.GetFltAttr(ColIdx1) : (
TFlt) RowI.GetIntAttr(ColIdx1);
4999 if (op ==
aoAdd) {
FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal + Num; }
5000 if (op ==
aoSub) {
FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal - Num; }
5001 if (op ==
aoMul) {
FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal * Num; }
5002 if (op ==
aoDiv) {
FltCols[ColIdx2][RowI.GetRowIdx()] = CurVal / Num; }
5012 TInt PartitionSize = Partitions[0].GetVal2()-Partitions[0].GetVal1()+1;
5013 #pragma omp parallel for schedule(dynamic, CHUNKS_PER_THREAD)
5014 for (
int i = 0; i < Partitions.
Len(); i++){
5018 if ((ArgType ==
atInt) && !ShouldCast) {
5020 TInt Val =
static_cast<int>(Num);
5077 TInt ColIdx3 = ColIdx1;
5080 if (ResAttr !=
"") {
5086 TStr CurVal1 = RowI.GetStrAttr(ColIdx1);
5087 TStr CurVal2 = RowI.GetStrAttr(ColIdx2);
5088 TStr NewVal = CurVal1 + Sep + CurVal2;
5095 const TStr& ResAttr,
TBool AddToFirstTable) {
5116 TInt ColIdx3 = ColIdx1;
5118 if (!AddToFirstTable) {
5123 if (ResAttr !=
"") {
5124 if (AddToFirstTable) {
5137 RI2 = Table.
BegRI();
5139 while (RI1 <
EndRI() && RI2 < Table.
EndRI()) {
5142 TStr NewVal = CurVal1 + Sep + CurVal2;
5144 if (AddToFirstTable) {
5154 if (RI1 !=
EndRI() || RI2 != Table.
EndRI()) {
5173 TInt ColIdx2 = ColIdx1;
5176 if (ResAttr !=
"") {
5182 TStr CurVal = RowI.GetStrAttr(ColIdx1);
5183 TStr NewVal = CurVal + Sep + Val;
5194 Result.
Add(it.GetIntAttr(ColId));
5203 Result.
Add(it.GetFltAttr(ColId));
5212 Result.
Add(it.GetStrAttr(ColId));
5218 for (
TInt c = 0; c < NProjectCols.
Len(); c++) {
5225 if (ProjectColsSet.
IsKey(ColName) || ColName ==
IdColName) {
continue; }
5248 if (!ProjectColsSet.
IsKey(ColName) && ColName !=
IdColName) {
continue; }
5269 if (ProjectColsSet.
IsKey(ColName) || ColName ==
IdColName) {
continue; }
5283 if (K1 == K2) {
return V1 - V2; }
5284 else {
return K1 - K2; }
5289 for (j = Start; j < End; j++) {
5290 if (
CompareKeyVal(Key[j], Val[j], Key[j+1], Val[j+1]) > 0) {
5294 if (j >= End) {
return 0; }
5300 for (
TInt i = Start+1; i <= End; i++) {
5304 while ((Start < j) && (
CompareKeyVal(Key[j-1], Val[j-1], K, V) > 0)) {
5316 TInt L = End - Start + 1;
5320 if (
CompareKeyVal(Key[Idx1], Val[Idx1], Key[Idx2], Val[Idx2]) < 0) {
5321 if (
CompareKeyVal(Key[Idx2], Val[Idx2], Key[Idx3], Val[Idx3]) < 0) {
return Idx2; }
5322 if (
CompareKeyVal(Key[Idx1], Val[Idx1], Key[Idx3], Val[Idx3]) < 0) {
return Idx3; }
5325 if (
CompareKeyVal(Key[Idx3], Val[Idx3], Key[Idx2], Val[Idx2]) < 0) {
return Idx2; }
5326 if (
CompareKeyVal(Key[Idx3], Val[Idx3], Key[Idx1], Val[Idx1]) < 0) {
return Idx3; }
5335 TInt PivotKey = Key[Pivot];
5336 TInt PivotVal = Val[Pivot];
5337 Key.
Swap(Pivot, End);
5338 Val.
Swap(Pivot, End);
5339 TInt StoreIdx = Start;
5340 for (
TInt i = Start; i < End; i++) {
5342 if (
CompareKeyVal(Key[i], Val[i], PivotKey, PivotVal) <= 0) {
5343 Key.
Swap(i, StoreIdx);
5344 Val.
Swap(i, StoreIdx);
5350 Key.
Swap(StoreIdx, End);
5351 Val.
Swap(StoreIdx, End);
5358 if (L <= 0) {
return; }
5361 if (L <= 20) {
ISortKeyVal(Key, Val, Start, End); }
5365 if (Pivot > End) {
return; }
5372 #pragma omp task untied shared(Key, Val)
5379 #pragma omp task untied shared(Key, Val)
5391 if (ColIndex.
IsKey(Val)) {
5392 return ColIndex.
GetDat(Val);
5401 TInt ValAtRow = RowI.GetIntAttr(ColName);
5402 if ( Val == ValAtRow) {
5403 ToReturn.
Add(RowI.GetRowIdx());
5412 if (ColIndex.
IsKey(Map)) {
5413 return ColIndex.
GetDat(Map);
5422 TInt MapAtRow = RowI.GetStrMapByName(ColName);
5423 if ( Map == MapAtRow) {
5424 ToReturn.
Add(RowI.GetRowIdx());
5434 if (ColIndex.
IsKey(Val)) {
5435 return ColIndex.
GetDat(Val);
5445 TFlt ValAtRow = RowI.GetFltAttr(ColName);
5446 if ( Val == ValAtRow) {
5447 ToReturn.
Add(RowI.GetRowIdx());
5457 TInt ValAtRow = RowI.GetIntAttr(ColName);
5458 TInt RowIdx = RowI.GetRowIdx();
5459 if (NewIndex.
IsKey(ValAtRow)) {
5466 NewIndex.
AddDat(ValAtRow, New_V);
5476 TFlt ValAtRow = RowI.GetFltAttr(ColName);
5477 TInt RowIdx = RowI.GetRowIdx();
5478 if (NewIndex.
IsKey(ValAtRow)) {
5485 NewIndex.
AddDat(ValAtRow, New_V);
5494 TInt MapAtRow = RowI.GetStrMapByName(ColName);
5495 TInt RowIdx = RowI.GetRowIdx();
5496 if (NewIndex.
IsKey(MapAtRow)) {
5503 NewIndex.
AddDat(MapAtRow, New_V);
TSize GetMemUsedKB()
Returns approximate memory used by table in [KB].
void ThresholdJoinInputCorrectness(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2)
void AddSchemaCol(const TStr &ColName, TAttrType ColType)
Adds column with name ColName and type ColType to the schema.
TFlt GetFltAttr(TInt ColIdx) const
Returns value of floating point attribute specified by float column index for current row...
TPair< TInt, TInt > TIntPr
TInt RequestIndexInt(const TStr &ColName)
Creates Index for Int Column ColName.
TBool IsLastGraphOfSequence()
Checks if the end of the graph sequence is reached.
TBool IsAttr(const TStr &Attr)
Checks if Attr is an attribute of this table schema.
void SetFltVal(TStr VarName, TFlt VarVal)
Set flt variable value in the predicate or all the children that use it.
void Order(const TStrV &OrderBy, TStr OrderColName="", TBool ResetRankByMSC=false, TBool Asc=true)
Orders the rows according to the values in columns of OrderBy (in descending lexicographic order)...
void FillBucketsByInterval(TStr SplitAttr, TIntPrV SplitIntervals)
Fills RowIdBuckets with sets of row ids.
bool Next()
Loads next line from the input file.
TIter EndI() const
Returns an iterator referring to the past-the-end element in the vector.
void RemoveRow(TInt RowIdx, TInt PrevRowIdx)
Removes row with id RowIdx.
TStrV EdgeAttrV
List of columns (attributes) to serve as edge attributes.
THash< GroupStmt, THash< TGroupKey, TIntV > > GroupMapping
Maps grouping statements to their (group-by key –> group id) mapping.
TInt FirstValidRow
Physical index of first valid row.
TStr DenormalizeColName(const TStr &ColName) const
Removes suffix to column name if exists.
void GetDatV(TVec< TDat > &DatV) const
TInt GetPivot(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Gets pivot element for QSort.
TInt GetColIdx(const TStr &ColName) const
Gets index of column ColName among columns of the same type in the schema.
enum TAttrType_ TAttrType
Types for tables, sparse and dense attributes.
TVec< uint64 > GetStartPosV(uint64 Lb, uint64 Ub) const
Finds start positions of all lines ending somewhere in [Lb, Ub)
void StoreGroupCol(const TStr &GroupColName, const TVec< TPair< TInt, TInt > > &GroupAndRowIds)
Parallel helper function for grouping. - we currently don't support such parallel grouping by complex...
static const TInt Last
Special value for Next vector entry - last row in table.
PTable UnionAll(const TTable &Table)
Returns union of this table with given Table, preserving duplicates.
::TSize GetMemUsed() const
static TInt PartitionKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
Primitive class: Wrapper around primitive data types.
bool operator==(const TRowIterator &RowI) const
Checks if this iterator points to the same row pointed by RowI.
TStrV GetSrcNodeIntAttrV() const
Gets src node int attribute name vector.
void PrintGrouping(const THash< TGroupKey, TIntV > &Grouping) const
void SelectFirstNRows(const TInt &N)
Selects first N rows from the table.
TStrV GetDstNodeStrAttrV() const
Gets dst node str attribute name vector.
void Del(const TSizeTy &ValN)
Removes the element at position ValN.
void GetPartitionRanges(TIntPrV &Partitions, TInt NumPartitions) const
Partitions the table into NumPartitions and populate Partitions with the ranges.
TInt GetIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for current row.
TPredComp
Comparison operators for selection predicates.
void Defrag()
Releases memory of deleted rows, and defrags.
PNEANet ToVarGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates the graph sequence one at a time.
void SaveBin(const TStr &OutFNm)
Saves table schema and content to a binary file.
TStr GetStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for current row.
void Save(TSOut &SOut) const
void AddIntCol(const TStr &ColName)
Adds an integer column with name ColName.
THash< TStr, TPair< TAttrType, TInt > > ColTypeMap
TStr Rvar
Right variable of the comparison op.
void ThresholdJoinCountCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntPr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
void AddGraphAttributeV(TStrV &Attrs, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds vector of names of columns to be used as graph attributes.
void GroupByIntColMP(const TStr &GroupBy, THashMP< TInt, TIntV > &Grouping, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values, using OpenMP multi-threading.
void SetFltColToConstMP(TInt UpdateColIdx, TFlt DefaultFltVal)
int GetFlds() const
Returns the number of fields in the current line.
const TVal1 & GetVal1() const
void ThresholdJoinCountPerJoinKeyCollisions(const TTable &TB, const TTable &TS, const TIntIntVH &T, TInt JoinColIdxB, TInt KeyColIdxB, TInt KeyColIdxS, THash< TIntTr, TIntTr > &Counters, TBool ThisIsSmaller, TAttrType JoinColType, TAttrType KeyType)
uint64 GetStreamPos() const
Returns position of stream pointer.
void ColAdd(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise addition. See TTable::ColGenericOp.
TArithOp
Possible column-wise arithmetic operations.
TInt RequestIndexStrMap(const TStr &ColName)
Creates Index for Str Column ColName.
TFlt GetNextFltAttr(TInt ColIdx) const
Returns value of float attribute specified by float column index for next row.
TSizeTy Len() const
Returns the number of elements in the vector.
void AddSelectedRows(const TTable &Table, const TIntV &RowIDs)
Adds rows from Table that correspond to ids in RowIDs.
TStr IdColName
A mapping from column name to column type and column index among columns of the same type...
Predicate - encapsulates comparison operations.
TBool CompareAtomicConstTStr(TInt ColIdx, const TStr &Val, TPredComp Cmp)
Compares value in column ColIdx with given TStr Val.
PTable SelfSimJoinPerGroup(const TStr &GroupAttr, const TStr &SimCol, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
static TStrV NormalizeColNameV(const TStrV &Cols)
Adds suffix to column name if it doesn't exist.
static TInt CompareKeyVal(const TInt &K1, const TInt &V1, const TInt &K2, const TInt &V2)
THash< TStr, THash< TInt, TIntV > > StrMapColIndexes
Indexes for String Columns.
THash< TStr, THash< TInt, TIntV > > IntColIndexes
Indexes for Int Columns.
void ColConcat(const TStr &Attr1, const TStr &Attr2, const TStr &Sep="", const TStr &ResAttr="")
Concatenates two string columns.
void Save(TSOut &SOut) const
TStrV GetSrcNodeStrAttrV() const
Gets src node str attribute name vector.
TTableContext * Context
Execution Context.
TSimType
Distance metrics for similarity joins.
TBool Start
A flag indicating whether the current row in the first valid row of the table.
void QSort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort on given vector V.
TAttrType Type
Type of the predicate variables.
TPredicateNode * Left
Left child of this node.
THash< TStr, TInt > IntVars
Int variables in the current predicate tree.
void InvalidateAffectedGroupings(const TStr &Attr)
void Dump(FILE *OutF=stdout) const
Prints table contents to a text file.
TInt LastValidRow
Physical index of last valid row.
void Group(const TStrV &GroupBy, const TStr &GroupColName, TBool Ordered=true, TBool UsePhysicalIds=true)
Groups rows depending on values of GroupBy columns.
TStr GetSubStr(const int &BChN, const int &EChN) const
void ResizeTable(int RowCount)
Resizes the table to hold RowCount rows.
TAttrAggr
Possible policies for aggregating node attributes.
void ColDiv(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise division. See TTable::ColGenericOp.
void Rename(const TStr &Column, const TStr &NewLabel)
Renames a column.
void GroupAux(const TStrV &GroupBy, THash< TGroupKey, TPair< TInt, TIntV > > &Grouping, TBool Ordered, const TStr &GroupColName, TBool KeepUnique, TIntV &UniqueVec, TBool UsePhysicalIds=true)
Helper function for grouping.
const TVal2 & GetVal2() const
TStrV GetEdgeFltAttrV() const
Gets edge float attribute name vector.
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
TStr GetNextStrAttr(TInt ColIdx) const
Returns value of string attribute specified by string column index for next row.
const TDat & GetDat(const TKey &Key) const
Node iterator. Only forward iteration (operator++) is supported.
void GetStrAttrNames(TStrV &Names) const
Gets vector of str attribute names.
Schema GetSchema()
Gets the schema of this table.
TVec< TIntV > RowIdBuckets
Partitioning of row ids into buckets corresponding to different graph objects when generating a seque...
TRowIteratorWithRemove BegRIWR()
Gets iterator with reomve to the first valid row.
TInt GetNumValidRows() const
Gets number of valid, i.e. not deleted, rows in this table.
TRowIterator BegRI() const
Gets iterator to the first valid row of the table.
int GetFlds() const
Returns the number of fields in the current line.
PNEANet ToGraphPerGroupIterator(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates the graph sequence one at a time.
TVec< TIntV > IntCols
Next[i] is the successor of row i. Table iterators follow the order dictated by Next ...
Iterator class for TTable rows, that allows logical row removal while iterating.
TSizeTy GetMemUsed() const
Returns the memory footprint (the number of bytes) of the vector.
void CheckAndAddIntNode(PNEANet Graph, THashSet< TInt > &NodeVals, TInt NodeId)
Checks if given NodeId is seen earlier; if not, add it to Graph and hashmap NodeVals.
TVec< PNEANet > ToGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates a sequence of graphs based on values of column SplitAttr and windows specified by JumpSize an...
void GroupByFltCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with float values. Returns hash table with grouping.
TInt GetStrMapByName(const TStr &Col) const
Returns integer mapping of string attribute specified by attribute name for current row...
PTable Minus(TTable &Table)
Returns table with rows that are present in this table but not in given Table.
bool IsKey(const TKey &Key) const
static PTable GetNodeTable(const PNEANet &Network, TTableContext *Context)
Extracts node TTable from PNEANet.
THash< TStr, TStr > StrVars
String variables in the current predicate tree.
TIntV GetStrRowIdxByMap(const TStr &ColName, const TInt &Map) const
Gets the rows containing int mapping Map in str column ColName.
int GetId() const
Returns edge ID.
TStr GetIdColName() const
Gets name of the id column of this table.
static TBool EvalStrAtom(const TStr &Val1, const TStr &Val2, TPredComp Cmp)
Compare atomic string values Val1 and Val2 using predicate Cmp.
TRowIteratorWithRemove()
Default constructor.
static void LoadSSSeq(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Sequentially loads data from input file at InFNm into NewTable.
void Save(TSOut &SOut) const
void IncrementNext()
Increments the next vector and set last, NumRows and NumValidRows.
PTable SimJoin(const TStrV &Cols1, const TTable &Table, const TStrV &Cols2, const TStr &DistanceColName, const TSimType &SimType, const TFlt &Threshold)
Performs join if the distance between two rows is less than the specified threshold.
bool Empty() const
Tests whether the vector is empty.
void InitIds()
Adds explicit row ids, initialize hash set mapping ids to physical rows.
TStrTrV CommonNodeAttrs
List of attribute pairs with values common to source and destination and their common given name...
void QSortPar(TIntV &V, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs QSort in parallel on given vector V.
void Save(TSOut &SOut)
Saves table schema and content to a binary format.
int GetDstNId() const
Returns the destination of the edge.
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
TBool Result
Result of evaulating the predicate rooted at this node.
void ReadFltCol(const TStr &ColName, TFltV &Result) const
Reads values of entire float column into Result.
void InvalidatePhysicalGroupings()
void SkipCommentLines()
Skips lines that begin with a comment character.
TPair< TIntV, TFltV > TGroupKey
Represents grouping key with IntV for integer and string attributes and FltV for float attributes...
Iterator class for TTable rows.
TInt GetNextRowIdx() const
Gets physical index of next row.
int GetId() const
Returns ID of the current node.
bool Eof() const
Checks for end of file.
void Aggregate(const TStrV &GroupByAttrs, TAttrAggr AggOp, const TStr &ValAttr, const TStr &ResAttr, TBool Ordered=true)
Aggregates values of ValAttr after grouping with respect to GroupByAttrs. Result are stored as new at...
TAttrType GetSchemaColType(TInt Idx) const
Gets type of the column with index Idx in the schema.
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
void SetIntVal(TStr VarName, TInt VarVal)
Set int variable value in the predicate or all the children that use it.
TStrV GetEdgeIntAttrV() const
Gets edge int attribute name vector.
void SetStrVal(TStr VarName, TStr VarVal)
Set str variable value in the predicate or all the children that use it.
void RemoveNext()
Removes next row.
TStr StrConst
Str const value if this object is a string constant.
TVec< PNEANet > ToGraphPerGroup(TStr GroupAttr, TAttrAggr AggrPolicy)
Creates a sequence of graphs based on grouping specified by GroupAttr.
const TTable * Table
Reference to table containing this row.
static void Throw(const TStr &MsgStr)
Schema DenormalizeSchema() const
Removes suffix to column names in the Schema.
PNEANet NextGraphIterator()
Calls to this must be preceded by a call to one of the above ToGraph*Iterator functions.
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
unsigned long long uint64
PNEANet BuildGraph(const TIntV &RowIds, TAttrAggr AggrPolicy)
Makes a single pass over the rows in the given row id set, and creates nodes, edges, assigns node and edge attributes.
TBool EvalAtomicPredicate(const TAtomicPredicate &Atom)
Evaluate the give atomic predicate.
void ColSub(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise subtraction. See TTable::ColGenericOp.
int GetSrcNId() const
Gets the source node of an edge.
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
int GetEmptyRowsStart(int NewRows)
Gets the start index to a chunk of empty rows of size NewRows.
THash< TStr, THash< TFlt, TIntV > > FltColIndexes
Indexes for Float Columns.
TStr Lvar
Left variable of the comparison op.
const char * GetKey(const int &KeyId) const
void ProjectInPlace(const TStrV &ProjectCols)
Keeps only the columns specified in ProjectCols.
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
void Reindex()
Reinitializes row ids.
TInt CurrBucket
Current row id bucket - used when generating a sequence of graphs using an iterator.
PTable IsNextK(const TStr &OrderCol, TInt K, const TStr &GroupBy, const TStr &RankColName="")
Distance based filter.
TAttrType GetColType(const TStr &ColName) const
Gets type of column ColName.
TVec< TIntV > StrColMaps
Data columns of integer mappings of string attributes.
int sync_bool_compare_and_swap(int *lock)
TRowIteratorWithRemove & Next()
Increments the iterator (For Python compatibility).
PNEANet ToGraphSequenceIterator(TStr SplitAttr, TAttrAggr AggrPolicy, TInt WindowSize, TInt JumpSize, TInt StartVal=TInt::Mn, TInt EndVal=TInt::Mx)
Creates the graph sequence one at a time.
int GetDstNId() const
Gets destination node of an edge.
int AddKey(const TKey &Key)
::TSize GetMemUsed() const
void GroupByIntCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with integer values.
PTable Join(const TStr &Col1, const TTable &Table, const TStr &Col2)
Performs equijoin.
bool IsKey(const TKey &Key) const
static void LoadSSPar(PTable &NewTable, const Schema &S, const TStr &InFNm, const TIntV &RelevantCols, const char &Separator, TBool HasTitleLine)
Parallelly loads data from input file at InFNm into NewTable. Only work when NewTable has no string c...
TIntV GetIntRowIdxByVal(const TStr &ColName, const TInt &Val) const
Gets the rows containing Val in int column ColName.
TInt GetRowIdx() const
Gets the id of the row pointed by this iterator.
bool GetFlt(const int &FldN, double &Val) const
If the field FldN is a float its value is returned in Val and the function returns true...
A class representing a cached grouping statement identifier.
TStr GetSchemaColName(TInt Idx) const
Gets name of the column with index Idx in the schema.
int GetSrcNId() const
Returns the source of the edge.
TInt GetStrMapById(TInt ColIdx) const
Returns integer mapping of a string attribute value specified by string column index for current row...
TStrV SrcNodeAttrV
List of columns (attributes) to serve as source node attributes.
TAttrAggr AggrPolicy
Aggregation policy used for solving conflicts between different values of an attribute of the same no...
static void QSortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
void Select(TPredicate &Predicate, TIntV &SelectedRows, TBool Remove=true)
Selects rows that satisfy given Predicate.
void UnionAllInPlace(const TTable &Table)
Same as TTable::ConcatTable.
char GetCh(const int &ChN) const
TIntIntH RowIdMap
Mapping of permanent row ids to physical id.
void SaveSS(const TStr &OutFNm)
Saves table schema and content to a TSV file.
PTable Union(const TTable &Table)
Returns union of this table with given Table.
void SelectAtomicConst(const TStr &Col, const TPrimitive &Val, TPredComp Cmp, TIntV &SelectedRows, PTable &SelectedTable, TBool Remove=true, TBool Table=true)
Selects rows where the value of Col matches given primitive Val.
void UpdateFltFromTable(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
Edge iterator. Only forward iteration (operator++) is supported.
void ColConcatConst(const TStr &Attr1, const TStr &Val, const TStr &Sep="", const TStr &ResAttr="")
Concatenates column values with given string value.
void GetCollidingRows(const TTable &T, THashSet< TInt > &Collisions)
Gets set of row ids of rows common with table T.
void AddGraphAttribute(const TStr &Attr, TBool IsEdge, TBool IsSrc, TBool IsDst)
Adds names of columns to be used as graph attributes.
void KeepSortedRows(const TIntV &KeepV)
Removes all rows that are not mentioned in the SORTED vector KeepV.
TPair< TAttrType, TInt > GetColTypeMap(const TStr &ColName) const
Gets column type and index of ColName.
TAttrType GetType() const
THash< TInt, TInt > TIntH
void GroupingSanityCheck(const TStr &GroupBy, const TAttrType &AttrType) const
Checks if grouping key exists and matches given attr type.
void GetFltAttrNames(TStrV &Names) const
Gets vector of flt attribute names.
TStrHash< TInt, TBigStrPool > StringVals
StringPool - stores string data values and maps them to integers.
void UpdateTableForNewRow()
Updates table state after adding one or more rows.
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
int AddKey(const char *Key)
static TInt UseMP
Global switch for choosing multi-threaded versions of TTable functions.
TPredComp Compare
Comparison op represented by this node.
void DelColType(const TStr &ColName)
Adds column with name ColName and type ColType to the ColTypeMap.
void ReadIntCol(const TStr &ColName, TIntV &Result) const
Reads values of entire int column into Result.
void FillBucketsByWindow(TStr SplitAttr, TInt JumpSize, TInt WindowSize, TInt StartVal, TInt EndVal)
Fills RowIdBuckets with sets of row ids.
static TStr NormalizeColName(const TStr &ColName)
Adds suffix to column name if it doesn't exist.
void AddStrCol(const TStr &ColName)
Adds a string column with name ColName.
THash< TStr, GroupStmt > GroupStmtNames
Maps user-given grouping statement names to their group-by attributes.
TRowIterator & Next()
Increments the iterator (For Python compatibility).
TStr SrcCol
Column (attribute) to serve as src nodes when constructing the graph.
void GetIntAttrNames(TStrV &Names) const
Gets vector of int attribute names.
void ISort(const TSizeTy &MnLValN, const TSizeTy &MxRValN, const bool &Asc)
Insertion sorts the values between positions MnLValN...MxLValN.
PTable Project(const TStrV &ProjectCols)
Returns table with only the columns in ProjectCols.
void StoreStrCol(const TStr &ColName, const TStrV &ColVals)
Adds entire str column to table.
TVec< TFltV > FltCols
Data columns of floating point attributes.
TStrV GetDstNodeFltAttrV() const
Gets dst node float attribute name vector.
TStrV DstNodeAttrV
List of columns (attributes) to serve as destination node attributes.
uint64 CountNewLinesInRange(uint64 Lb, uint64 Ub) const
Counts number of occurences of ' ' in [Lb, Ub)
Edge iterator. Only forward iteration (operator++) is supported.
TIntV Next
A vector describing the logical order of the rows.
static int GetRnd(const int &Range=0)
void Gen(const int &ExpectVals)
int AddKey(const TKey &Key)
TRowIterator EndRI() const
Gets iterator to the last valid row of the table.
void AddStrVal(const TInt &ColIdx, const TStr &Val)
Adds Val in column with id ColIdx.
TTable * Table
Reference to table containing this row.
int GetIntFromFldV(TVec< char * > &FieldsV, const int &FldN)
Gets integer at field FldN.
void AddRow(const TRowIterator &RI)
Adds row corresponding to RI.
void NextFromIndex(uint64 Index, TVec< char * > &FieldsV)
Loads next line starting from a given position.
TInt NumRows
Number of rows in the table (valid and invalid).
TFlt GetFltVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of float attribute ColName at row RowIdx.
static PTable LoadSS(const Schema &S, const TStr &InFNm, TTableContext *Context, const char &Separator= '\t', TBool HasTitleLine=false)
Loads table from spread sheet (TSV, CSV, etc). Note: HasTitleLine = true is not supported. Please comment title lines instead.
void Unique(const TStr &Col)
Removes rows with duplicate values in given column.
TRowIteratorWithRemove & operator++(int)
Increments the iterator.
void AddJointRow(const TTable &T1, const TTable &T2, TInt RowIdx1, TInt RowIdx2)
Adds joint row T1[RowIdx1]<=>T2[RowIdx2].
void Classify(TPredicate &Predicate, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
void Merge(TIntV &V, TInt Idx1, TInt Idx2, TInt Idx3, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Helper function for parallel QSort.
TStr DstCol
Column (attribute) to serve as dst nodes when constructing the graph.
void ReadStrCol(const TStr &ColName, TStrV &Result) const
Reads values of entire string column into Result.
TStr GetStrVal(TInt ColIdx, TInt RowIdx) const
Gets the value in column with id ColIdx at row RowIdx.
void GetKeyV(TVec< TKey > &KeyV) const
static PTable GetEdgeTable(const PNEANet &Network, TTableContext *Context)
Extracts edge TTable from PNEANet.
static const TInt Invalid
Special value for Next vector entry - logically removed row.
void AddColType(const TStr &ColName, TPair< TAttrType, TInt > ColType)
Adds column with name ColName and type ColType to the ColTypeMap.
PNEANet GetNextGraphFromSequence()
Returns the next graph in sequence corresponding to RowIdBuckets.
TBool CompareAtomicConst(TInt ColIdx, const TPrimitive &Val, TPredComp Cmp)
Compares value in column ColIdx with given primitive Val.
void StoreFltCol(const TStr &ColName, const TFltV &ColVals)
Adds entire flt column to table.
THash< GroupStmt, THash< TInt, TGroupKey > > GroupIDMapping
Maps grouping statements to their (group id –> group-by key) mapping.
TInt IntConst
Int const value if this object is an integer constant.
TIter BegI() const
Returns an iterator pointing to the first element in the vector.
TPredOp Op
Logical op represented by this node.
void GroupByStrCol(const TStr &GroupBy, T &Grouping, const TIntV &IndexSet, TBool All, TBool UsePhysicalIds=true) const
Groups/hashes by a single column with string values. Returns hash table with grouping.
TTableContext * ChangeContext(TTableContext *Context)
Changes the current context. Moves all object items to the new context.
TInt CurrRowIdx
Physical row index of current row pointer by iterator.
TPredicateNode * Root
Rood node of the current predicate tree.
void AggregateCols(const TStrV &AggrAttrs, TAttrAggr AggOp, const TStr &ResAttr)
Aggregates attributes in AggrAttrs across columns.
bool operator==(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to the same row pointed by RowI.
Table class: Relational table with columnar data storage.
bool operator<(const TRowIterator &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
void SetStreamPos(uint64 Pos)
Sets position of stream pointer.
void UpdateFltFromTableMP(const TStr &KeyAttr, const TStr &UpdateAttr, const TTable &Table, const TStr &FKeyAttr, const TStr &ReadAttr, TFlt DefaultFltVal=0.0)
static PTable GetEdgeTablePN(const PNGraphMP &Network, TTableContext *Context)
Extracts edge TTable from parallel graph PNGraphMP.
void ISort(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc=true)
Performs insertion sort on given vector V.
TInt GetRowIdx() const
Gets physical index of current row.
TInt RequestIndexFlt(const TStr &ColName)
Creates Index for Flt Column ColName.
static TBool EvalAtom(T Val1, T Val2, TPredComp Cmp)
Compare atomic values Val1 and Val2 using predicate Cmp.
bool operator<(const TRowIteratorWithRemove &RowI) const
Checks if this iterator points to a row that is before the one pointed by RowI.
void InitRowIdBuckets(int NumBuckets)
Initializes the RowIdBuckets vector which will be used for the graph sequence creation.
TStrV GetSrcNodeFltAttrV() const
Gets src node float attribute name vector.
static PTable GetFltNodePropertyTable(const PNEANet &Network, const TIntFltH &Property, const TStr &NodeAttrName, const TAttrType &NodeAttrType, const TStr &PropertyAttrName, TTableContext *Context)
Extracts node and edge property TTables from THash.
Hash-Table with multiprocessing support.
PTable ThresholdJoinPerJoinKeyOutputTable(const THash< TIntTr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
PTable ThresholdJoin(const TStr &KeyCol1, const TStr &JoinCol1, const TTable &Table, const TStr &KeyCol2, const TStr &JoinCol2, TInt Threshold, TBool PerJoinKey=false)
static void ISortKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
TBool IsConst
Flag if this atomic node represents a constant value.
TInt CurrRowIdx
Physical row index of current row pointed by iterator.
static TInt GetPivotKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
bool Next()
Loads next line from the input file.
TInt IsNextDirty
Flag to signify whether the rows are stored in logical sequence or reordered. Used for optimizing Get...
TStrV GetEdgeStrAttrV() const
Gets edge str attribute name vector.
void AddFltCol(const TStr &ColName)
Adds a float column with name ColName.
TInt CompareRows(TInt R1, TInt R2, const TAttrType &CompareByType, const TInt &CompareByIndex, TBool Asc=true)
Returns positive value if R1 is bigger, negative value if R2 is bigger, and 0 if they are equal (strc...
TStr RenumberColName(const TStr &ColName) const
Returns a re-numbered column name based on number of existing columns with conflicting names...
TTriple< TInt, TInt, TInt > TIntTr
TInt NumValidRows
Number of valid rows in the table (i.e. rows that were not logically removed).
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
PTable ThresholdJoinOutputTable(const THash< TIntPr, TIntTr > &Counters, TInt Threshold, const TTable &Table)
void Count(const TStr &CountColName, const TStr &Col)
Counts number of unique elements.
PTable InitializeJointTable(const TTable &Table)
Initializes an empty table for the join of this table with the given table.
void ColMax(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs max of two columns. See TTable::ColGenericOp.
void Reserve(const TSizeTy &_MxVals)
Reserves enough memory for the vector to store _MxVals elements.
void ClassifyAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
bool Cmp(const int &RelOp, const TRec &Rec1, const TRec &Rec2)
void StoreIntCol(const TStr &ColName, const TIntV &ColVals)
Adds entire int column to table.
void AddIdColumn(const TStr &IdColName)
Adds a column of explicit integer identifiers to the rows.
void GetVariables(TStrV &Variables)
Get variables in the predicate tree rooted at this node.
static TInt CheckSortedKeyVal(TIntV &Key, TIntV &Val, TInt Start, TInt End)
void AddEdgeAttributes(PNEANet &Graph, int RowId)
Adds attributes of edge corresponding to RowId to the Graph.
void GetIntAttrNames(TStrV &Names) const
Gets vector of int attribute names.
TVec< PNEANet > ToVarGraphSequence(TStr SplitAttr, TAttrAggr AggrPolicy, TIntPrV SplitIntervals)
Creates a sequence of graphs based on values of column SplitAttr and intervals specified by SplitInte...
TInt GetNextIntAttr(TInt ColIdx) const
Returns value of integer attribute specified by integer column index for next row.
void ColGenericOp(const TStr &Attr1, const TStr &Attr2, const TStr &ResAttr, TArithOp op)
Performs columnwise arithmetic operation.
void SelectAtomic(const TStr &Col1, const TStr &Col2, TPredComp Cmp, TIntV &SelectedRows, TBool Remove=true)
Selects rows using atomic compare operation.
TRowIterator & operator++(int)
Increments the iterator.
bool IsKey(const TKey &Key) const
void GetVariables(TStrV &Variables)
Get variables in current predicate.
bool IsInt(const int &FldN) const
Checks whether fields FldN is an integer.
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void ColMin(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs min of two columns. See TTable::ColGenericOp.
void ColMod(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise modulus. See TTable::ColGenericOp.
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
void GetFltAttrNames(TStrV &Names) const
Gets vector of flt attribute names.
void RemoveFirstRow()
Removes first valid row of the table.
bool IsStrIn(const TStr &Str) const
TBool IsFirst() const
Checks whether iterator points to first valid row of the table.
void Trunc(const TSizeTy &_Vals=-1)
Truncates the vector's length and capacity to _Vals elements.
Atomic predicate - encapsulates comparison operations.
TBool IsColName(const TStr &ColName) const
TInt CheckAndAddFltNode(T Graph, THash< TFlt, TInt > &NodeVals, TFlt FNodeVal)
Checks if given NodeVal is seen earlier; if not, add it to Graph and hashmap NodeVals.
Predicate node - represents a binary predicate operation on two predicate nodes.
void AddNodeAttributes(TInt NId, TStrV NodeAttrV, TInt RowId, THash< TInt, TStrIntVH > &NodeIntAttrs, THash< TInt, TStrFltVH > &NodeFltAttrs, THash< TInt, TStrStrVH > &NodeStrAttrs)
Takes as parameters, and updates, maps NodeXAttrs: Node Id –> (attribute name –> Vector of attribut...
void GetStrAttrNames(TStrV &Names) const
Gets vector of str attribute names.
PNEANet GetFirstGraphFromSequence(TAttrAggr AggrPolicy)
Returns the first graph of the sequence.
TDat & AddDat(const TKey &Key)
PTable Intersection(const TTable &Table)
Returns intersection of this table with given Table.
void AddNJointRowsMP(const TTable &T1, const TTable &T2, const TVec< TIntPrV > &JointRowIDSet)
Adds rows from T1 and T2 to this table in a parallel manner. Used by Join.
const TDat & GetDat(const TKey &Key) const
TFlt FltConst
Flt const value if this object is a float constant.
TBool Eval()
Return the result of evaluating current predicate.
TIntV GetFltRowIdxByVal(const TStr &ColName, const TFlt &Val) const
Gets the rows containing Val in flt column ColName.
TSize GetContextMemUsedKB()
Returns approximate memory used by table context in [KB].
uint64 GetStreamLen() const
Returns length of stream.
TPredicateNode * Parent
Parent node of this node.
const TKey & GetKey(const int &KeyId) const
TInt GetIntVal(const TStr &ColName, const TInt &RowIdx)
Gets the value of integer attribute ColName at row RowIdx.
void AddTable(const TTable &T)
Adds all the rows of the input table. Allows duplicate rows (not a union).
bool IsCmt() const
Checks whether the current line is a comment (starts with '#').
void ColMul(const TStr &Attr1, const TStr &Attr2, const TStr &ResultAttrName="")
Performs columnwise multiplication. See TTable::ColGenericOp.
void ClassifyAux(const TIntV &SelectedRows, const TStr &LabelName, const TInt &PositiveLabel=1, const TInt &NegativeLabel=0)
Adds a label attribute with positive labels on selected rows and negative labels on the rest...
THash< TStr, TFlt > FltVars
Float variables in the current predicate tree.
void AddNRows(int NewRows, const TVec< TIntV > &IntColsP, const TVec< TFltV > &FltColsP, const TVec< TIntV > &StrColMapsP)
Adds NewRows rows from the given vectors for each column type.
TVec< PTable > SpliceByGroup(const TStrV &GroupByAttrs, TBool Ordered=true)
Splices table into subtables according to a grouping statement.
int GetKeyId(const char *Key) const
void ColGenericOpMP(TInt ArgColIdx1, TInt ArgColIdx2, TAttrType ArgType1, TAttrType ArgType2, TInt ResColIdx, TArithOp op)
TVec< PNEANet > GetGraphsFromSequence(TAttrAggr AggrPolicy)
Returns a sequence of graphs.
TStrV GetDstNodeIntAttrV() const
Gets dst node int attribute name vector.
TAtomicPredicate Atom
Atomic predicate at this node.
bool IsFlt(const int &FldN) const
Checks whether fields FldN is a float.
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
TInt Partition(TIntV &V, TInt StartIdx, TInt EndIdx, const TVec< TAttrType > &SortByTypes, const TIntV &SortByIndices, TBool Asc)
Partitions vector for QSort.
double GetFltFromFldV(TVec< char * > &FieldsV, const int &FldN)
Gets float at field FldN.