From 57957f19eb3d9462eb56a5af262576c016fe7dbc Mon Sep 17 00:00:00 2001 From: xiaochong0302 Date: Thu, 30 Jan 2020 16:53:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=98=A5=E8=8A=82=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Console/Tasks/AvatarSyncTask.php | 91 +++++ app/Console/Tasks/ClassSpiderTask.php | 511 +++++++++++++++++++++++++ app/Console/Tasks/ReplySpiderTask.php | 156 ++++++++ app/Console/Tasks/ReviewSpiderTask.php | 117 ++++++ app/Console/Tasks/ThreadSpiderTask.php | 145 +++++++ app/Console/Tasks/TopicSpiderTask.php | 67 ++++ config/xs.course.ini.default | 3 + storage/tmp/.gitignore | 2 + 8 files changed, 1092 insertions(+) create mode 100644 app/Console/Tasks/AvatarSyncTask.php create mode 100644 app/Console/Tasks/ClassSpiderTask.php create mode 100644 app/Console/Tasks/ReplySpiderTask.php create mode 100644 app/Console/Tasks/ReviewSpiderTask.php create mode 100644 app/Console/Tasks/ThreadSpiderTask.php create mode 100644 app/Console/Tasks/TopicSpiderTask.php create mode 100644 storage/tmp/.gitignore diff --git a/app/Console/Tasks/AvatarSyncTask.php b/app/Console/Tasks/AvatarSyncTask.php new file mode 100644 index 00000000..de212a28 --- /dev/null +++ b/app/Console/Tasks/AvatarSyncTask.php @@ -0,0 +1,91 @@ +where('edu_role = 1') + ->limit($limit, $offset) + ->execute(); + + if ($users->count() > 0) { + $this->handleUsers($users); + } + } + } + + protected function handleUsers($users) + { + $storage = new Storage(); + + foreach ($users as $user) { + + $avatar = $user->avatar; + + if (!$avatar) { + continue; + } + + if (Text::startsWith($avatar, '/img/avatar')) { + continue; + } + + if (Text::startsWith($avatar, '//')) { + $avatar = 'http:' . $avatar; + } + + $url = str_replace(['-40-40', '-80-80', '-140-140', '-160-160'], '-200-200', $avatar); + + $fileName = parse_url($url, PHP_URL_PATH); + $filePath = tmp_path('avatar') . $fileName; + + $content = file_get_contents($url); + + if ($content === false) { + echo "get user {$user->id} avatar failed" . PHP_EOL; + continue; + } + + $put = file_put_contents($filePath, $content); + + if ($put === false) { + echo "put user {$user->id} cover failed" . PHP_EOL; + continue; + } + + $keyName = $this->getKeyName($filePath); + $remoteUrl = $storage->putFile($keyName, $filePath); + + if ($remoteUrl) { + $user->avatar = $keyName; + $user->deleted = 2; + $user->update(); + echo "upload avatar of user {$user->id} success" . PHP_EOL; + } else { + echo "upload avatar of user {$user->id} failed" . PHP_EOL; + } + } + } + + protected function getKeyName($filePath) + { + $ext = pathinfo($filePath, PATHINFO_EXTENSION); + return '/img/avatar/' . date('YmdHis') . rand(1000, 9999) . '.' . $ext; + } + +} diff --git a/app/Console/Tasks/ClassSpiderTask.php b/app/Console/Tasks/ClassSpiderTask.php new file mode 100644 index 00000000..5e3797fb --- /dev/null +++ b/app/Console/Tasks/ClassSpiderTask.php @@ -0,0 +1,511 @@ + ['div.shizhan-course-wrap > a', 'href'], + 'course_cover' => ['div.img-box > img.shizhan-course-img', 'src'], + 'course_title' => ['div.img-box > img.shizhan-course-img', 'alt'], + ]); + + $this->handleList($ql); + } + + protected function handleList($ql) + { + foreach (range(1, 6) as $page) { + + $url = "https://coding.imooc.com/?sort=0&unlearn=0&page={$page}"; + + echo "============== Page {$page} =================" . PHP_EOL; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + break; + } + + foreach ($data->all() as $item) { + + $courseData = [ + 'class_id' => $this->getCourseId($item['course_link']), + 'title' => $item['course_title'], + 'cover' => $item['course_cover'], + ]; + + //print_r($courseData); + + if ($courseData['class_id']) { + $course = CourseModel::findFirstByClassId($courseData['class_id']); + if (!$course) { + $course = new CourseModel(); + $course->create($courseData); + } + } + } + + } + + $ql->destruct(); + } + + public function courseAction() + { + $courses = CourseModel::query() + ->where('class_id > 114') + ->orderBy('class_id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handleCourse($course); + sleep(5); + } + } + + public function chapterAction() + { + $ql = QueryList::rules([ + 'chapter_title' => ['.chapter-bd > h5.name', 'text'], + 'chapter_summary' => ['.chapter-bd > p.desc', 'text'], + 'lesson_html' => ['.chapter-bd > ul', 'html'], + ]); + + $courses = CourseModel::query() + ->where('class_id > 114') + ->orderBy('class_id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handleChapter($ql, $course); + sleep(5); + } + } + + protected function handleChapter(QueryList $ql, $course) + { + echo " course id: {$course->id} , class_id :{$course->class_id} " . PHP_EOL; + + $url = "https://coding.imooc.com/class/chapter/{$course->class_id}.html"; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + return; + } + + $lesson_ql = QueryList::rules([ + 'lesson_title' => ['span.title_info', 'text'], + 'lesson_free' => ['span.watch-free', 'text'], + ]); + + foreach ($data->all() as $item) { + + $chapterData = [ + 'course_id' => $course->id, + 'title' => trim($item['chapter_title']), + 'summary' => trim($item['chapter_summary']), + ]; + + $chapter = new ChapterModel(); + $chapter->create($chapterData); + + $this->handleLesson($chapter, $lesson_ql, $item['lesson_html']); + } + + $ql->destruct(); + } + + protected function handleLesson($chapter, QueryList $lesson_ql, $html) + { + + $lessons = $lesson_ql->html($html)->query()->getData(); + + if ($lessons->count() == 0) { + return; + } + + foreach ($lessons->all() as $item) { + $data = [ + 'course_id' => $chapter->course_id, + 'parent_id' => $chapter->id, + 'title' => $item['lesson_title'], + 'free' => $item['lesson_free'] ? 1 : 0, + ]; + + $model = new ChapterModel(); + $model->create($data); + } + + $lesson_ql->destruct(); + } + + public function consultAction() + { + $courses = CourseModel::query() + ->where('class_id > 0') + ->orderBy('class_id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handleConsult($course); + sleep(5); + } + } + + protected function handleConsult($course) + { + + foreach (range(1, 20) as $page) { + + echo "course {$course->id}, page {$page}" . PHP_EOL; + + $url = "https://coding.imooc.com/class/ajaxconsultsearch?cid={$course->class_id}&page={$page}&pagesize=15"; + + $content = file_get_contents($url); + + $json = json_decode($content, true); + + $consults = $json['data']['data_adv']; + + if (empty($consults)) { + break; + } + + foreach ($consults as $item) { + $data = [ + 'question' => $item['content'], + 'answer' => $item['answer'], + 'like_count' => $item['praise'], + 'created_at' => strtotime($item['create_time']), + ]; + $consult = new ConsultModel(); + $consult->create($data); + } + + } + + } + + public function reviewAction() + { + $ql = QueryList::rules([ + 'review_content' => ['p.cmt-txt', 'text'], + 'review_rating' => ['div.stars > span', 'text'], + ]); + + $courses = CourseModel::query() + ->where('class_id > 0') + ->orderBy('class_id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handleReview($ql, $course); + sleep(5); + } + } + + protected function handleReview($ql, $course) + { + foreach (range(1, 10) as $page) { + + $url = "https://coding.imooc.com/class/evaluation/{$course->class_id}.html?page={$page}"; + + echo "============== Course {$course->id}, Page {$page} =================" . PHP_EOL; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + break; + } + + foreach ($data->all() as $item) { + + $reviewData = [ + 'course_id' => $course->id, + 'content' => $item['review_content'], + 'rating' => $this->getReviewRating($item['review_rating']), + ]; + + $review = new ReviewModel(); + $review->create($reviewData); + } + } + + $ql->destruct(); + } + + public function packageAction() + { + $ql = QueryList::rules([ + 'id' => ['a.js-buy-package', 'data-cid'], + 'title' => ['p.package-title', 'text'], + 'price' => ['p.package-price', 'text'], + 'other_html' => ['div.other-course-wrap', 'html'], + ]); + + $courses = CourseModel::query() + ->where('class_id > 0') + ->orderBy('class_id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handlePackage($ql, $course); + sleep(5); + } + } + + protected function handlePackage(QueryList $ql, $course) + { + echo " course id: {$course->id} , class_id :{$course->class_id} " . PHP_EOL; + + $url = "https://coding.imooc.com/class/package/{$course->class_id}.html"; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + return; + } + + $other_ql = QueryList::rules([ + 'href' => ['a.course-item', 'href'], + ]); + + foreach ($data->all() as $item) { + + $packageData = [ + 'id' => trim($item['id']), + 'title' => trim($item['title']), + 'market_price' => $this->getMarketPrice($item['price']), + ]; + + $package = PackageModel::findFirst($packageData['id']); + + if (!$package) { + $package = new PackageModel(); + $package->create($packageData); + } + + $cpRepo = new CoursePackageRepo(); + + $cp = $cpRepo->findCoursePackage($course->id, $package->id); + + if (!$cp) { + $cp = new CoursePackageModel(); + $cp->course_id = $course->id; + $cp->package_id = $package->id; + $cp->create(); + } + + $this->handleOtherPackageCourse($package, $other_ql, $item['other_html']); + } + + $ql->destruct(); + } + + protected function handleOtherPackageCourse($package, QueryList $other_ql, $html) + { + $courses = $other_ql->html($html)->query()->getData(); + + if ($courses->count() == 0) { + return; + } + + foreach ($courses->all() as $item) { + + $courseId = str_replace(['//coding.imooc.com/class/', '.html'], '', $item['href']); + + $cpRepo = new CoursePackageRepo(); + + $cp = $cpRepo->findCoursePackage($courseId, $package->id); + + if (!$cp) { + $cp = new CoursePackageModel(); + $cp->course_id = (int)$courseId; + $cp->package_id = $package->id; + $cp->create(); + } + } + + $other_ql->destruct(); + } + + protected function handleCourse($course) + { + echo " =============== class id {$course->class_id} ============" . PHP_EOL; + + $url = "https://coding.imooc.com/class/{$course->class_id}.html"; + + $ql = QueryList::getInstance()->get($url); + + $summary = $ql->find('div.info-desc')->text(); + $userLink = $ql->find('div.teacher > a')->attr('href'); + $marketPrice = $ql->find('div.ori-price')->text(); + $level = $ql->find('div.info-bar > span:eq(1)')->text(); + $duration = $ql->find('div.info-bar > span:eq(3)')->text(); + $userCount = $ql->find('div.info-bar > span:eq(5)')->text(); + $score = $ql->find('div.info-bar > span:eq(7)')->text(); + + $courseData = [ + 'summary' => trim($summary), + 'user_count' => intval($userCount), + 'market_price' => $this->getMarketPrice($marketPrice), + 'level' => $this->getLevel($level), + 'score' => $this->getScore($score), + 'attrs' => [ + 'duration' => $this->getCourseDuration($duration), + ], + ]; + + $course->update($courseData); + + $ql->destruct(); + + $userId = $this->getUserId($userLink); + + $user = UserModel::findFirst($userId); + + if ($user) { + + $user->edu_role = UserModel::EDU_ROLE_TEACHER; + $user->update(); + + $cuRepo = new \App\Repos\CourseUser(); + + $row = $cuRepo->findCourseTeacher($course->id, $user->id); + + if (!$row) { + $courseUser = new CourseUserModel(); + $courseUser->course_id = $course->id; + $courseUser->user_id = $user->id; + $courseUser->role_type = CourseUserModel::ROLE_TEACHER; + $courseUser->expire_time = strtotime('+15 years'); + $courseUser->create(); + } + } + + $this->handleTeacherInfo($userId); + + } + + protected function handleTeacherInfo($id) + { + $url = 'http://www.imooc.com/t/' . $id; + + $ql = QueryList::getInstance()->get($url); + + $data = []; + + $data['id'] = $id; + $data['avatar'] = $ql->find('img.tea-header')->attr('src'); + $data['name'] = $ql->find('p.tea-nickname')->text(); + $data['title'] = $ql->find('p.tea-professional')->text(); + $data['about'] = $ql->find('p.tea-desc')->text(); + + $user = UserModel::findFirst($id); + + if (!$user) { + $user = new UserModel(); + $user->create($data); + } + + $ql->destruct(); + } + + protected function getUserId($userLink) + { + $result = str_replace(['http://www.imooc.com/u/'], '', $userLink); + + return trim($result); + } + + protected function getCourseId($courseLink) + { + if (!strpos($courseLink, '.html')) { + return false; + } + + $result = str_replace(['/class/', '.html'], '', $courseLink); + + return trim($result); + } + + protected function getMarketPrice($price) + { + $price = str_replace('¥', '', $price); + return floatval(trim($price)); + } + + protected function getScore($score) + { + return floatval(trim($score) * 10); + } + + protected function getCourseDuration($duration) + { + $hours = 0; + $minutes = 0; + + if (preg_match('/(.*?)小时(.*?)分/s', $duration, $matches)) { + $hours = trim($matches[1]); + $minutes = trim($matches[2]); + } elseif (preg_match('/(.*?)小时/s', $duration, $matches)) { + $hours = trim($matches[1]); + } elseif (preg_match('/(.*?)分/s', $duration, $matches)) { + $minutes = trim($matches[1]); + } + + return 3600 * $hours + 60 * $minutes; + } + + protected function getChapterDuration($duration) + { + if (strpos($duration, ':') === false) { + return 0; + } + + list($minutes, $seconds) = explode(':', trim($duration)); + + return 60 * $minutes + $seconds; + } + + protected function getLevel($type) + { + $mapping = [ + '入门' => CourseModel::LEVEL_ENTRY, + '初级' => CourseModel::LEVEL_JUNIOR, + '中级' => CourseModel::LEVEL_MEDIUM, + '高级' => CourseModel::LEVEL_SENIOR, + ]; + + return $mapping[$type] ?? CourseModel::LEVEL_ENTRY; + } + + protected function getReviewRating($type) + { + $mapping = [ + '好评' => 10, + '中评' => 8, + '差评' => 6, + ]; + + return $mapping[$type] ?? 8; + } + +} diff --git a/app/Console/Tasks/ReplySpiderTask.php b/app/Console/Tasks/ReplySpiderTask.php new file mode 100644 index 00000000..6527fe61 --- /dev/null +++ b/app/Console/Tasks/ReplySpiderTask.php @@ -0,0 +1,156 @@ +columns(['id']) + ->where('id > 59429') + ->orderBy('id ASC') + ->execute(); + + $ql = $this->getRules(); + + foreach ($threads as $thread) { + $this->handleList($ql, $thread->id); + sleep(5); + } + } + + protected function getRules() + { + $ql = QueryList::getInstance()->rules([ + 'thread_content' => ['div.qa-disscus', 'html'], + 'user_link' => ['div.qa-comment-author > a', 'href'], + 'user_img' => ['div.qa-comment-author > a > img', 'src'], + 'user_name' => ['span.qa-comment-nick', 'text'], + 'reply_id' => ['div.qa-comment', 'data-cid'], + 'reply_content' => ['div.qa-comment-c > div.rich-text', 'html'], + 'reply_time' => ['span.qa-comment-time', 'text'], + ]); + + return $ql; + } + + protected function handleList($ql, $threadId) + { + + $thread = ThreadModel::findFirst($threadId); + + $first = true; + + foreach (range(1, 10) as $page) { + + $url = "https://www.imooc.com/qadetail/{$threadId}?page={$page}"; + + echo "============== Thread {$threadId}, Page {$page} =================" . PHP_EOL; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + break; + } + + foreach ($data->all() as $item) { + + if ($first) { + $threadContent = $this->getThreadContent($item['thread_content']); + if ($threadContent) { + $thread->update(['content' => $threadContent]); + } + $first = false; + } + + $userData = [ + 'id' => $this->getUserId($item['user_link']), + 'name' => $this->getUserName($item['user_name']), + 'avatar' => $item['user_img'], + ]; + + $user = UserModel::findFirst($userData['id']); + + if (!$user) { + $user = new UserModel(); + $user->create($userData); + } + + $replyData = [ + 'thread_id' => $threadId, + 'author_id' => $user->id, + 'id' => $item['reply_id'], + 'content' => $this->getReplyContent($item['reply_content']), + 'created_at' => $this->getReplyTime($item['reply_time']), + ]; + + $reply = ReplyModel::findFirst($replyData['id']); + + if (!$reply && $replyData['content']) { + $reply = new ReplyModel(); + $reply->create($replyData); + } + } + } + + $ql->destruct(); + } + + protected function getUserId($userLink) + { + $result = str_replace(['/u/', '/bbs'], '', $userLink); + + return trim($result); + } + + protected function getUserName($userName) + { + $result = mb_substr($userName, 0, 30); + + return $result; + } + + protected function getThreadContent($content) + { + $content = str_replace('  ', ' ', $content); + if (mb_strlen($content) > 3000) { + return false; + } + $result = mb_substr($content, 0, 3000); + return $result; + } + + protected function getReplyContent($content) + { + $content = str_replace('  ', ' ', $content); + if (mb_strlen($content) > 1500) { + return false; + } + $result = mb_substr($content, 0, 1500); + return $result; + } + + protected function getReplyTime($time) + { + $date = $this->filter->sanitize($time, ['trim', 'string']); + + if (strpos($date, '天')) { + $days = str_replace(['天前'], '', $date); + $days = intval($days); + $result = strtotime("-{$days} days"); + } else { + $result = strtotime(trim($date)); + } + + return $result; + } + +} diff --git a/app/Console/Tasks/ReviewSpiderTask.php b/app/Console/Tasks/ReviewSpiderTask.php new file mode 100644 index 00000000..8f7df3bc --- /dev/null +++ b/app/Console/Tasks/ReviewSpiderTask.php @@ -0,0 +1,117 @@ +columns(['id']) + ->where('id > 778') + ->orderBy('id ASC') + ->execute(); + + $ql = $this->getRules(); + + foreach ($courses as $course) { + $this->handleList($ql, $course->id); + sleep(5); + } + } + + protected function getRules() + { + $ql = QueryList::rules([ + 'user_link' => ['a.img-box', 'href'], + 'user_img' => ['a.img-box > img', 'src'], + 'user_name' => ['a.img-box > img', 'alt'], + 'review_content' => ['p.content', 'text'], + 'review_rating' => ['div.star-box > span', 'text'], + ]); + + return $ql; + } + + protected function handleList($ql, $courseId) + { + + foreach (range(1, 7) as $page) { + + $url = "https://www.imooc.com/course/coursescore/id/{$courseId}?page={$page}"; + + echo "============== Course {$courseId}, Page {$page} =================" . PHP_EOL; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + continue; + } + + foreach ($data->all() as $item) { + + $userData = [ + 'id' => $this->getUserId($item['user_link']), + 'name' => $item['user_name'], + 'avatar' => $item['user_img'], + ]; + + $user = UserModel::findFirst($userData['id']); + + if (!$user) { + $user = new UserModel(); + $user->create($userData); + } + + $reviewData = [ + 'user_id' => $user->id, + 'course_id' => $courseId, + 'content' => $this->getReviewContent($item['review_content']), + 'rating' => $this->getReviewRating($item['review_rating']), + ]; + + $reviewRepo = new ReviewRepo(); + + $reviewExist = $reviewRepo->findReview($courseId, $user->id); + + if (!$reviewExist) { + $review = new ReviewModel(); + $review->create($reviewData); + } + } + } + + $ql->destruct(); + } + + protected function getUserId($userLink) + { + $result = str_replace(['/u/', '/courses'], '', $userLink); + + return trim($result); + } + + protected function getReviewRating($rating) + { + $result = str_replace(['分'], '', $rating); + + return intval($result); + } + + protected function getReviewContent($content) + { + $result = $this->filter->sanitize($content, ['trim', 'string']); + + return $result; + } +} diff --git a/app/Console/Tasks/ThreadSpiderTask.php b/app/Console/Tasks/ThreadSpiderTask.php new file mode 100644 index 00000000..14a1f481 --- /dev/null +++ b/app/Console/Tasks/ThreadSpiderTask.php @@ -0,0 +1,145 @@ +columns(['id']) + ->where('id > 494') + ->orderBy('id ASC') + ->execute(); + + $ql = $this->getRules(); + + foreach ($courses as $course) { + $this->handleList($ql, $course->id); + sleep(5); + } + } + + protected function getRules() + { + $ql = QueryList::rules([ + 'user_link' => ['a.media', 'href'], + 'user_img' => ['a.media > img', 'src'], + 'user_name' => ['a.media', 'title'], + //'chapter_link' => ['div.l-box > a:eq(1)', 'href'], + 'thread_link' => ['a.qa-tit', 'href'], + 'thread_title' => ['a.qa-tit', 'text'], + 'thread_time' => ['em.r', 'text'], + ]); + + return $ql; + } + + protected function handleList($ql, $courseId) + { + + foreach (range(1, 10) as $page) { + + $url = "https://www.imooc.com/course/qa/id/{$courseId}/t/2?page={$page}"; + + echo "============== Course {$courseId}, Page {$page} =================" . PHP_EOL; + + $data = $ql->get($url)->query()->getData(); + + if ($data->count() == 0) { + break; + } + + foreach ($data->all() as $item) { + + $userData = [ + 'id' => $this->getUserId($item['user_link']), + 'name' => $this->getUserName($item['user_name']), + 'avatar' => $item['user_img'], + ]; + + $user = UserModel::findFirst($userData['id']); + + if (!$user) { + $user = new UserModel(); + $user->create($userData); + } + + $threadData = [ + 'course_id' => $courseId, + 'author_id' => $user->id, + 'id' => $this->getThreadId($item['thread_link']), + 'title' => $this->getThreadTitle($item['thread_title']), + 'created_at' => $this->getThreadTime($item['thread_time']), + ]; + + $thread = ThreadModel::findFirst($threadData['id']); + + if (!$thread) { + $thread = new ThreadModel(); + $thread->create($threadData); + } + } + } + + $ql->destruct(); + } + + protected function getUserId($userLink) + { + $result = str_replace(['/u/', '/courses'], '', $userLink); + + return trim($result); + } + + protected function getUserName($userName) + { + $result = mb_substr($userName, 0, 30); + + return $result; + } + + protected function getChapterId($chapterLink) + { + $result = str_replace(['/video/'], '', $chapterLink); + + return trim($result); + } + + protected function getThreadId($threadLink) + { + $result = str_replace(['/qadetail/'], '', $threadLink); + + return trim($result); + } + + protected function getThreadTitle($title) + { + $title = $this->filter->sanitize($title, ['trim']); + $result = mb_substr($title, 0, 120); + return $result; + } + + protected function getThreadTime($time) + { + $date = $this->filter->sanitize($time, ['trim', 'string']); + + if (strpos($date, '天')) { + $days = str_replace(['天前'], '', $date); + $days = intval($days); + $result = strtotime("-{$days} days"); + } else { + $result = strtotime(trim($date)); + } + + return $result; + } + +} diff --git a/app/Console/Tasks/TopicSpiderTask.php b/app/Console/Tasks/TopicSpiderTask.php new file mode 100644 index 00000000..845911e7 --- /dev/null +++ b/app/Console/Tasks/TopicSpiderTask.php @@ -0,0 +1,67 @@ +columns(['id']) + ->where('id > 810') + ->orderBy('id ASC') + ->execute(); + + foreach ($courses as $course) { + $this->handleList($course->id); + sleep(5); + } + } + + protected function handleList($courseId) + { + $url = "https://www.imooc.com/course/ajaxskillcourse?cid={$courseId}"; + + $content = file_get_contents($url); + + $result = json_decode($content, true); + + $topics = $result['data']; + + echo "============== Course {$courseId} =================" . PHP_EOL; + + if (empty($topics)) { + return; + } + + foreach ($topics as $item) { + + $topicData = [ + 'id' => $item['subject_id'], + 'title' => $item['title'], + 'alias' => $this->getAlias($item['url']), + ]; + + $topic = TopicModel::findFirst($topicData['id']); + + if (!$topic) { + $topic = new TopicModel(); + $topic->create($topicData); + } + } + + } + + protected function getAlias($url) + { + $result = str_replace('//www.imooc.com/topic/', '', $url); + return trim($result); + } + + +} diff --git a/config/xs.course.ini.default b/config/xs.course.ini.default index 6574b7e2..1197b8b2 100644 --- a/config/xs.course.ini.default +++ b/config/xs.course.ini.default @@ -42,6 +42,9 @@ type = string index = self tokenizer = full +[attrs] +type = string + [user_count] type = string diff --git a/storage/tmp/.gitignore b/storage/tmp/.gitignore new file mode 100644 index 00000000..c96a04f0 --- /dev/null +++ b/storage/tmp/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file