SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <zipfl.h>
Public Member Functions | |
TZipIn (const TStr &FNm) | |
TZipIn (const TStr &FNm, bool &OpenedP) | |
~TZipIn () | |
bool | Eof () |
int | Len () const |
char | GetCh () |
char | PeekCh () |
int | GetBf (const void *LBf, const TSize &LBfL) |
bool | GetNextLnBf (TChA &LnChA) |
uint64 | GetFLen () const |
uint64 | GetCurFPos () const |
Static Public Member Functions | |
static PSIn | New (const TStr &FNm) |
static PSIn | New (const TStr &FNm, bool &OpenedP) |
static bool | IsZipFNm (const TStr &FNm) |
Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2). | |
static bool | IsZipExt (const TStr &FNmExt) |
Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2). | |
static TStr | GetCmd (const TStr &ZipFNm) |
Return a command-line string that is executed in order to decompress a file to standard output. | |
static uint64 | GetFLen (const TStr &ZipFNm) |
Return the uncompressed size (in bytes) of the compressed file ZipFNm. | |
static PSIn | NewIfZip (const TStr &FNm) |
Static Public Attributes | |
static TStr | SevenZipPath = "/usr/bin" |
Private Member Functions | |
void | FillBf () |
int | FindEol (int &BfN) |
void | CreateZipProcess (const TStr &Cmd, const TStr &ZipFNm) |
TZipIn () | |
TZipIn (const TZipIn &) | |
TZipIn & | operator= (const TZipIn &) |
Static Private Member Functions | |
static void | FillFExtToCmdH () |
Private Attributes | |
FILE * | ZipStdoutRd |
FILE * | ZipStdoutWr |
uint64 | FLen |
uint64 | CurFPos |
char * | Bf |
int | BfC |
int | BfL |
Static Private Attributes | |
static TStrStrH | FExtToCmdH |
static const int | MxBfL = 32*1024 |
Compressed File Input Stream. The class reads from a compressed file without explicitly uncompressing it. This is eachieved by running external 7ZIP program which uncompresses to standard output, which is then piped to TZipFl. The class requires 7ZIP to be installed on the machine. Go to http://www.7-zip.org to install the software. 7z (7z.exe) is an executable and can decompress the following formats: .gz, .7z, .rar, .zip, .cab, .arj. bzip2. The class TZipIn expects that '7z' ('7z.exe') is in the working path. Make sure you can execute '7z e -y -bd -so <FILENAME>' For 7z to work properly you need both the 7z executable and the directory 'Codecs'. Use TZipIn::SevenZipPath to set the path to 7z executable.
NOTE: Current implementation of TZipIn supports only .zip format, other compression formats are not supported.
TZipIn::TZipIn | ( | ) | [private] |
TZipIn::TZipIn | ( | const TZipIn & | ) | [private] |
TZipIn::TZipIn | ( | const TStr & | FNm | ) |
Definition at line 68 of file zipfl.cpp.
References Bf, BfC, BfL, CreateZipProcess(), TStr::CStr(), EAssertR, EFailR, TStr::Empty(), TFile::Exists(), FillBf(), FLen, TStr::Fmt(), GetCmd(), TStr::GetFExt(), GetFLen(), MxBfL, ZipStdoutRd, and ZipStdoutWr.
: TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { EAssertR(! FNm.Empty(), "Empty file-name."); EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr()); FLen = 0; // non-zip files not supported, need uncompressed file length information if (FNm.GetFExt() != ".zip") { printf("*** Error: file %s, compression format %s not supported\n", FNm.CStr(), FNm.GetFExt().CStr()); EFailR(TStr::Fmt("File %s: compression format %s not supported", FNm.CStr(), FNm.GetFExt().CStr()).CStr()); } FLen = TZipIn::GetFLen(FNm); // return for malformed files if (FLen == 0) { return; } // empty file #ifdef GLib_WIN // create pipes SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. const int PipeBufferSz = 32*1024; EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); #else // no implementation needed #endif CreateZipProcess(GetCmd(FNm), FNm); Bf = new char[MxBfL]; BfC = BfL=-1; FillBf(); }
TZipIn::TZipIn | ( | const TStr & | FNm, |
bool & | OpenedP | ||
) |
Definition at line 100 of file zipfl.cpp.
References Bf, BfC, BfL, CreateZipProcess(), EAssertR, TStr::Empty(), TFile::Exists(), FillBf(), FLen, GetCmd(), TStr::GetFExt(), GetFLen(), MxBfL, ZipStdoutRd, and ZipStdoutWr.
: TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { EAssertR(! FNm.Empty(), "Empty file-name."); FLen = TZipIn::GetFLen(FNm); OpenedP = TFile::Exists(FNm); if (OpenedP) { #ifdef GLib_WIN SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); #else // no implementation needed #endif CreateZipProcess(GetCmd(FNm.GetFExt()), FNm); Bf = new char[MxBfL]; BfC = BfL=-1; FillBf(); } }
TZipIn::~TZipIn | ( | ) |
Definition at line 132 of file zipfl.cpp.
References Bf, EAssertR, ZipStdoutRd, and ZipStdoutWr.
{ #ifdef GLib_WIN if (ZipStdoutRd != NULL) { EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); } if (ZipStdoutWr != NULL) { EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); } #else if (ZipStdoutRd != NULL) { EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); } #endif if (Bf != NULL) { delete[] Bf; } }
void TZipIn::CreateZipProcess | ( | const TStr & | Cmd, |
const TStr & | ZipFNm | ||
) | [private] |
Definition at line 18 of file zipfl.cpp.
References TStr::CStr(), EAssertR, TStr::Fmt(), ZipStdoutRd, and ZipStdoutWr.
Referenced by TZipIn().
{ const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr()); #ifdef GLib_WIN PROCESS_INFORMATION piProcInfo; STARTUPINFO siStartInfo; ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdOutput = ZipStdoutWr; siStartInfo.dwFlags |= STARTF_USESTDHANDLES; // Create the child process. const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(), // command line NULL, // process security attributes NULL, // primary thread security attributes TRUE, // handles are inherited 0, // creation flags NULL, // use parent's environment NULL, // use parent's current directory &siStartInfo, // STARTUPINFO pointer &piProcInfo); // receives PROCESS_INFORMATION EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); CloseHandle(piProcInfo.hProcess); CloseHandle(piProcInfo.hThread); #else ZipStdoutRd = popen(CmdLine.CStr(), "r"); if (ZipStdoutRd == 0) { // try using SevenZipPath ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r"); } EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); #endif }
bool TZipIn::Eof | ( | ) | [inline, virtual] |
void TZipIn::FillBf | ( | ) | [private] |
Definition at line 51 of file zipfl.cpp.
References Bf, BfC, BfL, CurFPos, EAssert, EAssertR, FLen, TSBase::GetSNm(), MxBfL, and ZipStdoutRd.
Referenced by FindEol(), GetBf(), GetCh(), PeekCh(), and TZipIn().
{ EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached."); EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'."); #ifdef GLib_WIN // Read output from the child process DWORD BytesRead; EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); #else size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); EAssert(BytesRead != 0); #endif BfL = (int) BytesRead; CurFPos += BytesRead; EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'."); BfC = 0; }
void TZipIn::FillFExtToCmdH | ( | ) | [static, private] |
Definition at line 206 of file zipfl.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), THash< TKey, TDat, THashFunc >::Empty(), and FExtToCmdH.
Referenced by GetCmd(), and IsZipExt().
{ // 7za decompress: "e -y -bd -so"; #ifdef GLib_WIN const char* ZipCmd = "7z.exe e -y -bd -so"; #else const char* ZipCmd = "7za e -y -bd -so"; #endif if (FExtToCmdH.Empty()) { FExtToCmdH.AddDat(".gz", ZipCmd); FExtToCmdH.AddDat(".7z", ZipCmd); FExtToCmdH.AddDat(".rar", ZipCmd); FExtToCmdH.AddDat(".zip", ZipCmd); FExtToCmdH.AddDat(".cab", ZipCmd); FExtToCmdH.AddDat(".arj", ZipCmd); FExtToCmdH.AddDat(".bzip2", ZipCmd); FExtToCmdH.AddDat(".bz2", ZipCmd); } }
int TZipIn::FindEol | ( | int & | BfN | ) | [private] |
Definition at line 185 of file zipfl.cpp.
References Bf, BfC, BfL, Eof(), and FillBf().
Referenced by GetNextLnBf().
{ char Ch; if (BfC >= BfL) { // check for eof, read more data if (Eof()) { return -1; } FillBf(); } while (BfC < BfL) { Ch = Bf[BfC++]; if (Ch=='\n') { BfN = BfC-1; return 1; } if (Ch=='\r' && Bf[BfC+1]=='\n') { BfC++; BfN = BfC-2; return 1; } } BfN = BfC; return 0; }
int TZipIn::GetBf | ( | const void * | LBf, |
const TSize & | LBfL | ||
) | [virtual] |
Implements TSIn.
Definition at line 145 of file zipfl.cpp.
References Bf, BfC, BfL, and FillBf().
{ int LBfS=0; if (TSize(BfC+LBfL)>TSize(BfL)){ for (TSize LBfC=0; LBfC<LBfL; LBfC++){ if (BfC==BfL){FillBf();} LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];} } else { for (TSize LBfC=0; LBfC<LBfL; LBfC++){ LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);} } return LBfS; }
char TZipIn::GetCh | ( | ) | [inline, virtual] |
TStr TZipIn::GetCmd | ( | const TStr & | ZipFNm | ) | [static] |
Return a command-line string that is executed in order to decompress a file to standard output.
Definition at line 225 of file zipfl.cpp.
References TStr::CStr(), EAssertR, THash< TKey, TDat, THashFunc >::Empty(), FExtToCmdH, FillFExtToCmdH(), TStr::Fmt(), THash< TKey, TDat, THashFunc >::GetDat(), TStr::GetFExt(), TStr::GetLc(), and THash< TKey, TDat, THashFunc >::IsKey().
Referenced by TZipIn().
{ if (FExtToCmdH.Empty()) FillFExtToCmdH(); const TStr Ext = ZipFNm.GetFExt().GetLc(); EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr())); return FExtToCmdH.GetDat(Ext); }
uint64 TZipIn::GetCurFPos | ( | ) | const [inline] |
uint64 TZipIn::GetFLen | ( | ) | const [inline] |
uint64 TZipIn::GetFLen | ( | const TStr & | ZipFNm | ) | [static] |
Return the uncompressed size (in bytes) of the compressed file ZipFNm.
Definition at line 232 of file zipfl.cpp.
References Bf, BfC, BfL, TStr::CStr(), EAssert, EAssertR, TStr::Fmt(), IAssert, TVec< TVal, TSizeTy >::Len(), MxBfL, SaveToErrLog(), TStr::SplitOnWs(), WrNotify(), ZipStdoutRd, and ZipStdoutWr.
{ #ifdef GLib_WIN HANDLE ZipStdoutRd, ZipStdoutWr; // create pipes SECURITY_ATTRIBUTES saAttr; saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDOUT. const int PipeBufferSz = 32*1024; EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); // Ensure the read handle to the pipe for STDOUT is not inherited. SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); //CreateZipProcess(GetCmd(FNm), FNm); { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr()); PROCESS_INFORMATION piProcInfo; STARTUPINFO siStartInfo; ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdOutput = ZipStdoutWr; siStartInfo.dwFlags |= STARTF_USESTDHANDLES; // Create the child process. const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(), NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo); EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); CloseHandle(piProcInfo.hProcess); CloseHandle(piProcInfo.hThread); } #else const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr()); FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r"); if (ZipStdoutRd == NULL) { // try using SevenZipPath ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r"); } EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); #endif // Read output from the child process const int BfSz = 32*1024; char* Bf = new char [BfSz]; int BfC=0, BfL=0; memset(Bf, 0, BfSz); #ifdef GLib_WIN DWORD BytesRead; EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); #else size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); EAssert(BytesRead != 0); EAssert(pclose(ZipStdoutRd) != -1); #endif BfL = (int) BytesRead; IAssert((BfC!=0)||(BfL!=0)); BfC = 0; Bf[BfL] = 0; // find file lenght TStr Str(Bf); delete [] Bf; TStrV StrV; Str.SplitOnWs(StrV); int n = StrV.Len()-1; while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; } if (n-7 <= 0) { WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr()); SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr()); return 0; } return StrV[n-7].GetInt64(); }
bool TZipIn::GetNextLnBf | ( | TChA & | LnChA | ) | [virtual] |
Implements TSIn.
Definition at line 161 of file zipfl.cpp.
References TChA::AddBf(), Bf, BfC, BfL, TChA::Clr(), TChA::Empty(), and FindEol().
{ int Status; int BfN; // new pointer to the end of line int BfP; // previous pointer to the line start LnChA.Clr(); do { if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer else { BfP = BfC; } Status = FindEol(BfN); if (Status >= 0) { LnChA.AddBf(&Bf[BfP],BfN-BfP); if (Status == 1) { return true; } // got a complete line } // get more data, if the line is incomplete } while (Status == 0); // eof or the last line has no newline return !LnChA.Empty(); }
bool TZipIn::IsZipExt | ( | const TStr & | FNmExt | ) | [static] |
Check whether the file extension FNmExt is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
Definition at line 201 of file zipfl.cpp.
References THash< TKey, TDat, THashFunc >::Empty(), FExtToCmdH, FillFExtToCmdH(), and THash< TKey, TDat, THashFunc >::IsKey().
Referenced by IsZipFNm(), and TSsParser::TSsParser().
{ if (FExtToCmdH.Empty()) FillFExtToCmdH(); return FExtToCmdH.IsKey(FNmExt); }
static bool TZipIn::IsZipFNm | ( | const TStr & | FNm | ) | [inline, static] |
Check whether the file extension of FNm is that of a compressed file (.gz, .7z, .rar, .zip, .cab, .arj. bzip2).
Definition at line 56 of file zipfl.h.
References TStr::GetFExt(), and IsZipExt().
Referenced by TSnap::LoadDyNet(), TSnap::LoadDyNetGraphV(), and NewIfZip().
int TZipIn::Len | ( | ) | const [inline, virtual] |
PSIn TZipIn::New | ( | const TStr & | FNm | ) | [static] |
Definition at line 124 of file zipfl.cpp.
References TZipIn().
Referenced by TSnap::LoadDyNet(), TSnap::LoadDyNetGraphV(), and NewIfZip().
PSIn TZipIn::New | ( | const TStr & | FNm, |
bool & | OpenedP | ||
) | [static] |
static PSIn TZipIn::NewIfZip | ( | const TStr & | FNm | ) | [inline, static] |
char TZipIn::PeekCh | ( | ) | [inline, virtual] |
char* TZipIn::Bf [private] |
int TZipIn::BfC [private] |
int TZipIn::BfL [private] |
uint64 TZipIn::CurFPos [private] |
TStrStrH TZipIn::FExtToCmdH [static, private] |
Definition at line 19 of file zipfl.h.
Referenced by FillFExtToCmdH(), GetCmd(), and IsZipExt().
uint64 TZipIn::FLen [private] |
const int TZipIn::MxBfL = 32*1024 [static, private] |
TStr TZipIn::SevenZipPath = "/usr/bin" [static] |
FILE* TZipIn::ZipStdoutRd [private] |
FILE * TZipIn::ZipStdoutWr [private] |