采集网站 相关文件--ArticleSpider.php [ 1.0 版本 ] [ 1.0 版本 ]
YII2网站采集
(1)QueueController.php
(2)ArticleController.php
(3)ArticleJob.php
(4)ArticleSpider.php
(5)YiichinaSpider.php
<?php
namespace console\models;
use common\models\Article;
use common\models\ArticleTag;
use common\models\Gather;
use common\models\Tag;
class ArticleSpider{
protected $category = [];//网站文章分类
protected $baseUrl = '';//网站域名
protected $name = '';//网站名称
/**
* 判断文章是否采集
* @param $url
* @return bool
*/
protected function isGathered($url){
$gather = Gather::find()->where(['url'=>md5(trim($url)),'res'=>true])->one();
return $gather?true:false;
}
/**
* 插入URL队列
* @param $category
* @param $url
* @param $className
* @param string $publishTime
*/
public function enqueue($category,$url,$className,$publishTime=''){
\Resque::enqueue('article_spider', 'console\models\ArticleJob',['category'=>$category,'url'=>$url,'className'=>$className,'publishTime'=>$publishTime]);
}
/**
* 将文章插入数据库
* @param $title
* @param $content
* @param $publish_at
* @param $tag
* @return bool
*/
public static function insert($title,$content,$publish_at,$tag=''){
//插入标签(搜索的分类)
$article = new Article();
$article->title = $title;
$article->content = $content;
$article->author = 'yang';
$article->status = Article::STATUS_GATHER;
$article->publish_at = $publish_at;
$res = $article->save(false);
if($tag){
try{
$tagModel = Tag::find()->where(['name'=>$tag])->one();
if(!$tagModel){
$tagModel = new Tag();
$tagModel->name = $tag;
$tagModel->article_count = 0;
$tagModel->save(false);
}
$articleTag = new ArticleTag();
$articleTag->article_id = $article->id;
$articleTag->tag_id = $tagModel->id;
$articleTag->save(false);
}catch(\Exception $e){
echo $e->getMessage().PHP_EOL;
}
}
return $res?true:false;
}
/**
* 采集日志
* @param $url
* @param $category
* @param $res
* @param $result
*/
public function addLog($url,$category,$res,$result){
$gather = new Gather();
$gather->name = $this->name;
$gather->category = $category;
$gather->url = md5($url);
$gather->url_org = $url;
$gather->res = $res;
$gather->result = $result;
$gather->save();
}
}
?>
specialnot
注册时间:2015-08-06
最后登录:2019-08-16
在线时长:27小时54分
最后登录:2019-08-16
在线时长:27小时54分
- 粉丝43
- 金钱1175
- 威望200
- 积分3445
共 1 条评论
表结构来一个啊