123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- "use strict";
- let exportObj = {
- url : '',
- protocol : '',
- origin : '',
- url_relative : '',
- $ : null,
- /**
- * 展开td的colspan
- * @author benzhan
- */
- fixColspan($table) {
- let $ = this.$;
- let $trs = $table.find('tr');
- for (let i = 0; i < $trs.length; i++) {
- let $tr = $($trs[i]);
- let $tds = $tr.find('td[colspan]');
- for (let j = 0; j < $tds.length; j++) {
- let $td = $($tds[j]);
- let span = $td.attr('colspan');
- for (let k = 1; k < span; k++) {
- $td.after('<td></td>');
- }
- $td.removeAttr('colspan');
- }
- }
- },
- formatDate: function(date) {
- let y, m, d = '01';
- if (typeof date === 'string') {
- let parts = date.match(/[\d]{4}-[\d]{1,2}-[\d]{1,2}/);
- if (parts && parts.length) {
- return parts[0];
- }
- let parts1 = date.match(/([\d]{4})[\s]*年/);
- let parts2 = date.match(/([\d]{1,2})[\s]*月/);
- let parts3 = date.match(/([\d]{1,2})[\s]*日/);
- if ((parts1 && parts1.length || parts3 && parts3.length) && parts2.length ) {
- y = parseInt(parts1[1]);
- m = parseInt(parts2[1]);
- d = parseInt(parts3 && parts3[1]) || 1;
- } else {
- return '';
- }
- } else {
- if (typeof date === 'number') {
- date = new Date(date);
- }
- y = date.getFullYear();
- m = date.getMonth() + 1;
- d = date.getDate();
- }
- m = m < 10 ? '0' + m : m;
- d = d < 10 ? ('0' + d) : d;
- return y + '-' + m + '-' + d;
- },
- formatDateTime: function(date) {
- let dateStr = this.formatDate(date);
- let h, minute, second = '00';
- if (typeof date === 'string') {
- let parts = date.match(/[\d]{1,2}:[\d]{1,2}(:[\d]{1,2})?/);
- if (parts && parts.length) {
- return dateStr + ' ' + parts[0];
- }
- let parts1 = date.match(/([\d]{1,2})[\s]*时/);
- let parts2 = date.match(/([\d]{1,2})[\s]*分/);
- let parts3 = date.match(/([\d]{1,2})[\s]*时/);
- if (parts1 && parts1.length && parts2 && parts2.length) {
- h = parseInt(parts1[1]);
- minute = parseInt(parts2[1]);
- second = parseInt(parts3 && parts3[1]) || 0;
- } else {
- return dateStr;
- }
- } else {
- if (typeof date === 'number') {
- date = new Date(date);
- }
- h = date.getHours();
- second = date.getSeconds();
- minute = date.getMinutes();
- }
- h = h < 10 ? ('0' + h) : h;
- minute = minute < 10 ? ('0' + minute) : minute;
- second = second < 10 ? ('0' + second) : second;
- return dateStr + ' ' + h + ':' + minute + ':' + second;
- },
- initUrl(url) {
- this.url = url;
- let parts = url.match(/(http[s]?:)\/\/[^\/]+[\/]?/);
- this.origin = parts[0];
- this.protocol = parts[1];
- this.url_relative = url.substr(0, url.lastIndexOf('/') + 1);
- },
- initJquery($) {
- this.$ = $;
- },
- formatUrl(href) {
- href = href.trim();
- let origin = this.origin;
- if (/http[s]?:\/\//.test(href)) {
- return href;
- } else if (href.substr(0, 2) === '//') {
- return this.protocol + href;
- } else if (href[0] === '/') {
- if (origin[origin.length - 1] === '/') {
- return origin + href.substr(1);
- } else {
- return origin + href;
- }
- } else {
- return this.url_relative + href;
- }
- },
- formatRichText(content) {
- let $ = this.$;
- let $content = $('<div>' + content.trim() + '</div>');
- $content.find('script').remove();
- $content.find('img').each(function() {
- let formatSrc = exportObj.formatUrl($(this).attr('src'));
- $(this).attr('src', formatSrc);
- });
- $content.find('a').each(function() {
- let href = $(this).attr('href');
- if (href) {
- let formatHref = exportObj.formatUrl(href);
- $(this).attr('href', formatHref);
- }
- $(this).attr('rel', 'nofollow');
- });
- return $content.html();
- },
- md5(text) {
- let crypto = require('crypto');
- return crypto.createHash('md5').update(text).digest('hex');
- },
- setNextTime(task_id, interval) {
- const php = require('phpjs');
- const TableHelper = require('../framework/lib/TableHelper.js');
- // next_crawl_time = 0;
- const where = {task_id};
- // 修改最后更新时间
- let last_crawl_time = php.date('Y-m-d H:i:s');
- let next_crawl_time = php.time() + interval;
- const objTask = new TableHelper('task', 'crawl');
- objTask.updateObject({last_crawl_time, next_crawl_time}, where);
- }
- }
- // 兼容老版本
- exportObj.formaRichText = exportObj.formatRichText;
- module.exports = exportObj;
|