博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
获取电驴首页推荐信息和指定栏目信息
阅读量:4625 次
发布时间:2019-06-09

本文共 6957 字,大约阅读时间需要 23 分钟。

标 题: 获取电驴首页推荐信息和指定栏目信息

作 者: itdef
链 接:  

欢迎转帖 请保持文本完整并注明出处

 

/********************************************************************************  @file        *  @author      def< qq group: 324164944 >*  @blog        http://www.cnblogs.com/itdef/*  @brief     /*******************************************************************************/ #include "stdafx.h" #include 
#include
#include
#include
#include
#include
using namespace std; #ifdef _DEBUG#define new DEBUG_NEW#endif int GetHttpFileData(CString strUrl,char* DownloadHtmFileName);int ParseHomePageDownloadFile(char* szfileName);int UTF8Str2GBK(const string& strUTF8,string& strGBK);void GetHomePageRecommend(char* szName,const string& strGbk); // 唯一的应用程序对象 CWinApp theApp; using namespace std; int ParseUpdateFile(char* szfileName){ int iRet = -1; if(NULL == szfileName) return iRet; fstream fs(szfileName); stringstream ss ; // 创建字符串流对象 ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中 fs.close(); string str = ss.str(); // 获取流中的字符串 string strGbk; int i = UTF8Str2GBK(str,strGbk); if(strGbk.size() == 0 || i != 0) { cerr << "transfer utf8 to gbk error" << endl; return iRet; } basic_string
::size_type keyWordStart = strGbk.find("
"); basic_string <char>::size_type keyWordEnd = strGbk.find("",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) ) { string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7); cout << strKeyWord << endl; } keyWordStart = strGbk.find("
"); keyWordEnd = strGbk.find("
",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) ) { string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22); cout << strKeyWord << endl; } iRet = 0; return iRet;} void ShowUpdateInfo(char* szHtmAddress){ if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; } if( 0 != ParseUpdateFile("HtmDownloadFile")) { cerr << "ParseUpdateFile error once" << endl; } } void ShowHomePageElement(char* szHomePageAddress){ if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; } if( 0 != ParseHomePageDownloadFile("HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; }} int _tmain(int argc, TCHAR* argv[], TCHAR* envp[]){ int nRetCode = 0; // 初始化 MFC 并在失败时显示错误 if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0)) { // TODO: 更改错误代码以符合您的需要 _tprintf(_T("错误: MFC 初始化失败\n")); nRetCode = 1; } else { // TODO: 在此处为应用程序的行为编写代码。 ShowHomePageElement("http://www.verycd.com/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/790244/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/519062/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/780306/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/522227/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/507338/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/515005/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/794197/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/511135/"); cout << "****************************************************" << endl; } system("pause"); return nRetCode;} int UTF8Str2GBK(const string& strUTF8,string& strGBK){ int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0); WCHAR *wsz = NULL; TCHAR *tsz = NULL; int iRet = -1; wsz = new WCHAR[i+1]; if( NULL == wsz) { goto UTF8Str2GBK_EXIT; } MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i); i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL); tsz = new TCHAR[i+1]; if( NULL == tsz) { goto UTF8Str2GBK_EXIT; } WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL); strGBK = string(tsz); iRet = 0;UTF8Str2GBK_EXIT: delete []wsz; delete []tsz; return iRet;} int ParseHomePageDownloadFile(char* szfileName){ int iRet = -1; if(NULL == szfileName) return iRet; fstream fs(szfileName); stringstream ss ; // 创建字符串流对象 ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中 fs.close(); string str = ss.str(); // 获取流中的字符串 string strGbk; int i = UTF8Str2GBK(str,strGbk); if(strGbk.size() == 0 || i != 0) { cerr << "transfer utf8 to gbk error" << endl; return iRet; } cout << "首页大推" << endl; GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk); cout << "首页小推" << endl; GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk); iRet = 0; return iRet;} void GetHomePageRecommend(char* szName,const string& strGbk){ set
setKeyWord; //cout << strGbk; basic_string
::size_type keyWordStart = strGbk.find(szName); basic_string
::size_type keyWordEnd = strGbk.find("')",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) ) { string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37); setKeyWord.insert(strKeyWord); //cout << "电驴首页小推 " << strKeyWord << endl; } while( keyWordStart != string::npos && keyWordEnd != string::npos) { keyWordStart = strGbk.find(szName,keyWordEnd+1); keyWordEnd = strGbk.find("')",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) ) { string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37); setKeyWord.insert(strKeyWord); //cout << "电驴首页小推 " << strKeyWord << endl; } } set
::iterator pos; for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos) { cout << "电驴首页推荐 " << *pos << endl; } } int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName){ CInternetSession Session("Internet Explorer", 0); CHttpFile *pHttpFile = NULL; CString strData; CString strClip; int iRet = -1; if(szDownloadHtmFileName == NULL) { cerr << "DownloadHtmFileName is NULL" << endl; Session.Close(); return iRet; } ofstream of(szDownloadHtmFileName); if (of.bad()) { cerr << "of create file error" << endl; Session.Close(); return iRet; } try { pHttpFile = (CHttpFile*)Session.OpenURL(strUrl); while ( pHttpFile->ReadString(strClip) ) { of << strClip; } }catch(CInternetException* pEx) { TCHAR pszError[64]; pEx->GetErrorMessage(pszError, 64); cerr << __FUNCTION__ << pszError << endl; goto GetHttpFileData_EXIT; } iRet = 0; GetHttpFileData_EXIT: Session.Close(); of.close(); return iRet;}

  关于字符集转换的 文章 

转载于:https://www.cnblogs.com/itdef/p/4081963.html

你可能感兴趣的文章
MFC(四)文本编程
查看>>
Mmc编程
查看>>
MySQL之路 ——2、步履维艰的建表
查看>>
【原】 COCOS2D—LUA 获取剪贴板内容
查看>>
Spring Cloud(四):服务容错保护 Hystrix【Finchley 版】
查看>>
寻找最大数(三)
查看>>
0924 java学习记录
查看>>
flume source,sinks类型官方翻译
查看>>
canal架构原理
查看>>
HTTP隧道工具HTTPTunnel
查看>>
字符编码常识及问题解析
查看>>
hbase的缺陷
查看>>
Makefile文本处理函数
查看>>
BZOJ4650: [Noi2016]优秀的拆分
查看>>
BZOJ1058: [ZJOI2007]报表统计
查看>>
BZOJ3894: 文理分科
查看>>
动态生成元素动作绑定,jquery 1.9如何实现
查看>>
POJ2559 Largest Rectangle in a Histogram(单调栈)
查看>>
paip.自适应网页设计 跟 响应式 设计的区别跟原理and实践总结
查看>>
时间慢走些,慢走些
查看>>