SNAP Library 2.2, User Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 00002 // ZIP Input-File 00003 00004 #if defined(GLib_WIN) 00005 TStr TZipIn::SevenZipPath = "C:\\7Zip"; 00006 #elif defined(GLib_CYGWIN) 00007 TStr TZipIn::SevenZipPath = "/usr/bin"; 00008 #elif defined(GLib_MACOSX) 00009 TStr TZipIn::SevenZipPath = "/opt/local/bin"; 00010 #else 00011 TStr TZipIn::SevenZipPath = "/usr/bin"; 00012 #endif 00013 00014 00015 TStrStrH TZipIn::FExtToCmdH; 00016 const int TZipIn::MxBfL=32*1024; 00017 00018 void TZipIn::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) { 00019 const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr()); 00020 #ifdef GLib_WIN 00021 PROCESS_INFORMATION piProcInfo; 00022 STARTUPINFO siStartInfo; 00023 ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); 00024 ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); 00025 siStartInfo.cb = sizeof(STARTUPINFO); 00026 siStartInfo.hStdOutput = ZipStdoutWr; 00027 siStartInfo.dwFlags |= STARTF_USESTDHANDLES; 00028 // Create the child process. 00029 const BOOL FuncRetn = CreateProcess(NULL, 00030 (LPSTR) CmdLine.CStr(), // command line 00031 NULL, // process security attributes 00032 NULL, // primary thread security attributes 00033 TRUE, // handles are inherited 00034 0, // creation flags 00035 NULL, // use parent's environment 00036 NULL, // use parent's current directory 00037 &siStartInfo, // STARTUPINFO pointer 00038 &piProcInfo); // receives PROCESS_INFORMATION 00039 EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00040 CloseHandle(piProcInfo.hProcess); 00041 CloseHandle(piProcInfo.hThread); 00042 #else 00043 ZipStdoutRd = popen(CmdLine.CStr(), "r"); 00044 if (ZipStdoutRd == 0) { // try using SevenZipPath 00045 ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r"); 00046 } 00047 EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00048 #endif 00049 } 00050 00051 void TZipIn::FillBf(){ 00052 EAssertR(CurFPos < FLen, "End of file "+GetSNm()+" reached."); 00053 EAssertR((BfC==BfL)/*&&((BfL==-1)||(BfL==MxBfL))*/, "Error reading file '"+GetSNm()+"'."); 00054 #ifdef GLib_WIN 00055 // Read output from the child process 00056 DWORD BytesRead; 00057 EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); 00058 #else 00059 size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); 00060 EAssert(BytesRead != 0); 00061 #endif 00062 BfL = (int) BytesRead; 00063 CurFPos += BytesRead; 00064 EAssertR((BfC!=0)||(BfL!=0), "Error reading file '"+GetSNm()+"'."); 00065 BfC = 0; 00066 } 00067 00068 TZipIn::TZipIn(const TStr& FNm) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), 00069 FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { 00070 EAssertR(! FNm.Empty(), "Empty file-name."); 00071 EAssertR(TFile::Exists(FNm), TStr::Fmt("File %s does not exist", FNm.CStr()).CStr()); 00072 FLen = 0; 00073 // non-zip files not supported, need uncompressed file length information 00074 if (FNm.GetFExt() != ".zip") { 00075 printf("*** Error: file %s, compression format %s not supported\n", FNm.CStr(), FNm.GetFExt().CStr()); 00076 EFailR(TStr::Fmt("File %s: compression format %s not supported", FNm.CStr(), FNm.GetFExt().CStr()).CStr()); 00077 } 00078 FLen = TZipIn::GetFLen(FNm); 00079 // return for malformed files 00080 if (FLen == 0) { return; } // empty file 00081 #ifdef GLib_WIN 00082 // create pipes 00083 SECURITY_ATTRIBUTES saAttr; 00084 saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 00085 saAttr.bInheritHandle = TRUE; 00086 saAttr.lpSecurityDescriptor = NULL; 00087 // Create a pipe for the child process's STDOUT. 00088 const int PipeBufferSz = 32*1024; 00089 EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); 00090 // Ensure the read handle to the pipe for STDOUT is not inherited. 00091 SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); 00092 #else 00093 // no implementation needed 00094 #endif 00095 CreateZipProcess(GetCmd(FNm), FNm); 00096 Bf = new char[MxBfL]; BfC = BfL=-1; 00097 FillBf(); 00098 } 00099 00100 TZipIn::TZipIn(const TStr& FNm, bool& OpenedP) : TSBase(FNm.CStr()), TSIn(FNm), ZipStdoutRd(NULL), ZipStdoutWr(NULL), 00101 FLen(0), CurFPos(0), Bf(NULL), BfC(0), BfL(0) { 00102 EAssertR(! FNm.Empty(), "Empty file-name."); 00103 FLen = TZipIn::GetFLen(FNm); 00104 OpenedP = TFile::Exists(FNm); 00105 if (OpenedP) { 00106 #ifdef GLib_WIN 00107 SECURITY_ATTRIBUTES saAttr; 00108 saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 00109 saAttr.bInheritHandle = TRUE; 00110 saAttr.lpSecurityDescriptor = NULL; 00111 // Create a pipe for the child process's STDOUT. 00112 EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, 0), "Stdout pipe creation failed"); 00113 // Ensure the read handle to the pipe for STDOUT is not inherited. 00114 SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); 00115 #else 00116 // no implementation needed 00117 #endif 00118 CreateZipProcess(GetCmd(FNm.GetFExt()), FNm); 00119 Bf = new char[MxBfL]; BfC = BfL=-1; 00120 FillBf(); 00121 } 00122 } 00123 00124 PSIn TZipIn::New(const TStr& FNm) { 00125 return PSIn(new TZipIn(FNm)); 00126 } 00127 00128 PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){ 00129 return PSIn(new TZipIn(FNm, OpenedP)); 00130 } 00131 00132 TZipIn::~TZipIn(){ 00133 #ifdef GLib_WIN 00134 if (ZipStdoutRd != NULL) { 00135 EAssertR(CloseHandle(ZipStdoutRd), "Closing read-end of pipe failed"); } 00136 if (ZipStdoutWr != NULL) { 00137 EAssertR(CloseHandle(ZipStdoutWr)!=0, "Closing write-end of pipe failed"); } 00138 #else 00139 if (ZipStdoutRd != NULL) { 00140 EAssertR(pclose(ZipStdoutRd) != -1, "Closing of the process failed"); } 00141 #endif 00142 if (Bf != NULL) { delete[] Bf; } 00143 } 00144 00145 int TZipIn::GetBf(const void* LBf, const TSize& LBfL){ 00146 int LBfS=0; 00147 if (TSize(BfC+LBfL)>TSize(BfL)){ 00148 for (TSize LBfC=0; LBfC<LBfL; LBfC++){ 00149 if (BfC==BfL){FillBf();} 00150 LBfS+=((char*)LBf)[LBfC]=Bf[BfC++];} 00151 } else { 00152 for (TSize LBfC=0; LBfC<LBfL; LBfC++){ 00153 LBfS+=(((char*)LBf)[LBfC]=Bf[BfC++]);} 00154 } 00155 return LBfS; 00156 } 00157 00158 // Gets the next line to LnChA. 00159 // Returns true, if LnChA contains a valid line. 00160 // Returns false, if LnChA is empty, such as end of file was encountered. 00161 bool TZipIn::GetNextLnBf(TChA& LnChA) { 00162 int Status; 00163 int BfN; // new pointer to the end of line 00164 int BfP; // previous pointer to the line start 00165 LnChA.Clr(); 00166 do { 00167 if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer 00168 else { BfP = BfC; } 00169 Status = FindEol(BfN); 00170 if (Status >= 0) { 00171 LnChA.AddBf(&Bf[BfP],BfN-BfP); 00172 if (Status == 1) { return true; } // got a complete line 00173 } 00174 // get more data, if the line is incomplete 00175 } while (Status == 0); 00176 // eof or the last line has no newline 00177 return !LnChA.Empty(); 00178 } 00179 00180 // Sets BfN to the end of line or end of buffer. Reads more data, if needed. 00181 // Returns 1, when an end of line was found, BfN is end of line. 00182 // Returns 0, when an end of line was not found and more data is required, 00183 // BfN is end of buffer. 00184 // Returns -1, when an end of file was found, BfN is not defined. 00185 int TZipIn::FindEol(int& BfN) { 00186 char Ch; 00187 if (BfC >= BfL) { // check for eof, read more data 00188 if (Eof()) { return -1; } 00189 FillBf(); 00190 } 00191 while (BfC < BfL) { 00192 Ch = Bf[BfC++]; 00193 if (Ch=='\n') { BfN = BfC-1; return 1; } 00194 if (Ch=='\r' && Bf[BfC+1]=='\n') { 00195 BfC++; BfN = BfC-2; return 1; } 00196 } 00197 BfN = BfC; 00198 return 0; 00199 } 00200 00201 bool TZipIn::IsZipExt(const TStr& FNmExt) { 00202 if (FExtToCmdH.Empty()) FillFExtToCmdH(); 00203 return FExtToCmdH.IsKey(FNmExt); 00204 } 00205 00206 void TZipIn::FillFExtToCmdH() { 00207 // 7za decompress: "e -y -bd -so"; 00208 #ifdef GLib_WIN 00209 const char* ZipCmd = "7z.exe e -y -bd -so"; 00210 #else 00211 const char* ZipCmd = "7za e -y -bd -so"; 00212 #endif 00213 if (FExtToCmdH.Empty()) { 00214 FExtToCmdH.AddDat(".gz", ZipCmd); 00215 FExtToCmdH.AddDat(".7z", ZipCmd); 00216 FExtToCmdH.AddDat(".rar", ZipCmd); 00217 FExtToCmdH.AddDat(".zip", ZipCmd); 00218 FExtToCmdH.AddDat(".cab", ZipCmd); 00219 FExtToCmdH.AddDat(".arj", ZipCmd); 00220 FExtToCmdH.AddDat(".bzip2", ZipCmd); 00221 FExtToCmdH.AddDat(".bz2", ZipCmd); 00222 } 00223 } 00224 00225 TStr TZipIn::GetCmd(const TStr& ZipFNm) { 00226 if (FExtToCmdH.Empty()) FillFExtToCmdH(); 00227 const TStr Ext = ZipFNm.GetFExt().GetLc(); 00228 EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr())); 00229 return FExtToCmdH.GetDat(Ext); 00230 } 00231 00232 uint64 TZipIn::GetFLen(const TStr& ZipFNm) { 00233 #ifdef GLib_WIN 00234 HANDLE ZipStdoutRd, ZipStdoutWr; 00235 // create pipes 00236 SECURITY_ATTRIBUTES saAttr; 00237 saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 00238 saAttr.bInheritHandle = TRUE; 00239 saAttr.lpSecurityDescriptor = NULL; 00240 // Create a pipe for the child process's STDOUT. 00241 const int PipeBufferSz = 32*1024; 00242 EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed"); 00243 // Ensure the read handle to the pipe for STDOUT is not inherited. 00244 SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0); 00245 //CreateZipProcess(GetCmd(FNm), FNm); 00246 { const TStr CmdLine = TStr::Fmt("7z.exe l %s", ZipFNm.CStr()); 00247 PROCESS_INFORMATION piProcInfo; 00248 STARTUPINFO siStartInfo; 00249 ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); 00250 ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); 00251 siStartInfo.cb = sizeof(STARTUPINFO); 00252 siStartInfo.hStdOutput = ZipStdoutWr; 00253 siStartInfo.dwFlags |= STARTF_USESTDHANDLES; 00254 // Create the child process. 00255 const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(), 00256 NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo); 00257 EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00258 CloseHandle(piProcInfo.hProcess); 00259 CloseHandle(piProcInfo.hThread); } 00260 #else 00261 const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr()); 00262 FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r"); 00263 if (ZipStdoutRd == NULL) { // try using SevenZipPath 00264 ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r"); 00265 } 00266 EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00267 #endif 00268 // Read output from the child process 00269 const int BfSz = 32*1024; 00270 char* Bf = new char [BfSz]; 00271 int BfC=0, BfL=0; 00272 memset(Bf, 0, BfSz); 00273 #ifdef GLib_WIN 00274 DWORD BytesRead; 00275 EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0); 00276 #else 00277 size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd); 00278 EAssert(BytesRead != 0); 00279 EAssert(pclose(ZipStdoutRd) != -1); 00280 #endif 00281 BfL = (int) BytesRead; IAssert((BfC!=0)||(BfL!=0)); 00282 BfC = 0; Bf[BfL] = 0; 00283 // find file lenght 00284 TStr Str(Bf); delete [] Bf; 00285 TStrV StrV; Str.SplitOnWs(StrV); 00286 int n = StrV.Len()-1; 00287 while (n > 0 && ! StrV[n].IsPrefix("-----")) { n--; } 00288 if (n-7 <= 0) { 00289 WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr()); 00290 SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr()); 00291 return 0; 00292 } 00293 return StrV[n-7].GetInt64(); 00294 } 00295 00297 // Output-File 00298 TStrStrH TZipOut::FExtToCmdH; 00299 const TSize TZipOut::MxBfL=4*1024; 00300 00301 void TZipOut::FlushBf() { 00302 #ifdef GLib_WIN 00303 DWORD BytesOut; 00304 EAssertR(WriteFile(ZipStdinWr, Bf, DWORD(BfL), &BytesOut, NULL)!=0, "Error writting to the file '"+GetSNm()+"'."); 00305 #else 00306 size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr); 00307 #endif 00308 EAssert(BytesOut == BfL); 00309 BfL = 0; 00310 } 00311 00312 void TZipOut::CreateZipProcess(const TStr& Cmd, const TStr& ZipFNm) { 00313 const TStr CmdLine = TStr::Fmt("%s %s", Cmd.CStr(), ZipFNm.CStr()); 00314 #ifdef GLib_WIN 00315 PROCESS_INFORMATION piProcInfo; 00316 STARTUPINFO siStartInfo; 00317 ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION)); 00318 ZeroMemory( &siStartInfo, sizeof(STARTUPINFO)); 00319 siStartInfo.cb = sizeof(STARTUPINFO); 00320 siStartInfo.hStdInput = ZipStdinRd; 00321 siStartInfo.dwFlags |= STARTF_USESTDHANDLES; 00322 // Create the child process. 00323 const BOOL FuncRetn = CreateProcess(NULL, 00324 (LPSTR) CmdLine.CStr(), // command line 00325 NULL, // process security attributes 00326 NULL, // primary thread security attributes 00327 TRUE, // handles are inherited 00328 0, // creation flags 00329 NULL, // use parent's environment 00330 NULL, // use parent's current directory 00331 &siStartInfo, // STARTUPINFO pointer 00332 &piProcInfo); // receives PROCESS_INFORMATION 00333 EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00334 CloseHandle(piProcInfo.hProcess); 00335 CloseHandle(piProcInfo.hThread); 00336 #else 00337 ZipStdinWr = popen(CmdLine.CStr(),"w"); 00338 if (ZipStdinWr == NULL) { // try using SevenZipPath 00339 ZipStdinWr = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r"); 00340 } 00341 EAssertR(ZipStdinWr != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr()); 00342 #endif 00343 } 00344 00345 TZipOut::TZipOut(const TStr& FNm) : TSBase(FNm.CStr()), TSOut(FNm), ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0){ 00346 EAssertR(! FNm.Empty(), "Empty file-name."); 00347 #ifdef GLib_WIN 00348 // create pipes 00349 SECURITY_ATTRIBUTES saAttr; 00350 saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 00351 saAttr.bInheritHandle = TRUE; 00352 saAttr.lpSecurityDescriptor = NULL; 00353 // Create a pipe for the child process's STDOUT. 00354 EAssertR(CreatePipe(&ZipStdinRd, &ZipStdinWr, &saAttr, 0), "Stdout pipe creation failed"); 00355 // Ensure the read handle to the pipe for STDOUT is not inherited. 00356 SetHandleInformation(ZipStdinWr, HANDLE_FLAG_INHERIT, 0); 00357 #else 00358 // no implementation necessary 00359 #endif 00360 CreateZipProcess(GetCmd(FNm), FNm); 00361 Bf=new char[MxBfL]; BfL=0; 00362 } 00363 00364 PSOut TZipOut::New(const TStr& FNm){ 00365 return PSOut(new TZipOut(FNm)); 00366 } 00367 00368 TZipOut::~TZipOut() { 00369 if (BfL!=0) { FlushBf(); } 00370 #ifdef GLib_WIN 00371 if (ZipStdinWr != NULL) { EAssertR(CloseHandle(ZipStdinWr), "Closing write-end of pipe failed"); } 00372 if (ZipStdinRd != NULL) { EAssertR(CloseHandle(ZipStdinRd), "Closing read-end of pipe failed"); } 00373 #else 00374 if (ZipStdinWr != NULL) { EAssertR(pclose(ZipStdinWr) != -1, "Closing of the process failed"); } 00375 #endif 00376 if (Bf!=NULL) { delete[] Bf; } 00377 } 00378 00379 int TZipOut::PutCh(const char& Ch){ 00380 if (BfL==MxBfL) {FlushBf();} 00381 return Bf[BfL++]=Ch; 00382 } 00383 00384 int TZipOut::PutBf(const void* LBf, const TSize& LBfL){ 00385 int LBfS=0; 00386 if (BfL+LBfL>MxBfL){ 00387 for (TSize LBfC=0; LBfC<LBfL; LBfC++){ 00388 LBfS+=PutCh(((char*)LBf)[LBfC]);} 00389 } else { 00390 for (TSize LBfC=0; LBfC<LBfL; LBfC++){ 00391 LBfS+=(Bf[BfL++]=((char*)LBf)[LBfC]);} 00392 } 00393 return LBfS; 00394 } 00395 00396 void TZipOut::Flush(){ 00397 FlushBf(); 00398 #ifdef GLib_WIN 00399 EAssertR(FlushFileBuffers(ZipStdinWr)!=0, "Can not flush file '"+GetSNm()+"'."); 00400 #else 00401 EAssertR(fflush(ZipStdinWr)==0, "Can not flush file '"+GetSNm()+"'."); 00402 #endif 00403 } 00404 00405 bool TZipOut::IsZipExt(const TStr& FNmExt) { 00406 if (FExtToCmdH.Empty()) FillFExtToCmdH(); 00407 return FExtToCmdH.IsKey(FNmExt); 00408 } 00409 00410 void TZipOut::FillFExtToCmdH() { 00411 // 7za compress: "a -y -bd -si{CompressedFNm}" 00412 #ifdef GLib_WIN 00413 const char* ZipCmd = "7z.exe a -y -bd -si"; 00414 #else 00415 const char* ZipCmd = "7za a -y -bd -si"; 00416 #endif 00417 if (FExtToCmdH.Empty()) { 00418 FExtToCmdH.AddDat(".gz", ZipCmd); 00419 FExtToCmdH.AddDat(".7z", ZipCmd); 00420 FExtToCmdH.AddDat(".rar", ZipCmd); 00421 FExtToCmdH.AddDat(".zip", ZipCmd); 00422 FExtToCmdH.AddDat(".cab", ZipCmd); 00423 FExtToCmdH.AddDat(".arj", ZipCmd); 00424 FExtToCmdH.AddDat(".bzip2", ZipCmd); 00425 FExtToCmdH.AddDat(".bz2", ZipCmd); 00426 } 00427 } 00428 00429 TStr TZipOut::GetCmd(const TStr& ZipFNm) { 00430 if (FExtToCmdH.Empty()) FillFExtToCmdH(); 00431 const TStr Ext = ZipFNm.GetFExt().GetLc(); 00432 EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr())); 00433 return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid(); 00434 }