火车头采集器双标题插件【完整php源码】

2023-09-15 16:09:10 628人浏览八月长安

摘要

最近比较流行的一种伪原创方式，火车头采集器双标题插件，功能主要如下： 1、原文标题伪原创，实现标题相关双标题； 2、文章内容伪原创，提高文章的原创度； 3、文章内容摘要提取。废话就不多说，插件是用php开发的，需要的小伙伴可以试试，直

权重站镇楼

最近比较流行的一种伪原创方式，火车头采集器双标题插件，功能主要如下：

1、原文标题伪原创，实现标题相关双标题；

2、文章内容伪原创，提高文章的原创度；

3、文章内容摘要提取。

废话就不多说，插件是用php开发的，需要的小伙伴可以试试，直接给大家线上源码：

$LabelArray['标题'], 'wenzhang'=>$LabelArray[$content_tag_name]));    $title_wyc = trim($article_new[0]);    $content_wyc = trim($article_new[1]);    $content_wyc = fix_newline($content_wyc);    $content_wyc = str_replace('标签：', '标签:', $content_wyc);    $LabelArray[$content_tag_name] = $headdd. $content_wyc. $taill;    $LabelArray['摘要'] = strip_tags($summary);    $title_wyc = str_replace(array('。','，','％'), array(' ',' ',' '), $title_wyc);    $LabelArray['标题'] = $title_wyc;    }    catch (Exception $e) {        $LabelArray['标题'] .= $e->getMessage();        $LabelArray[$content_tag_name] .= $e->getMessage();    }        break;    default:        //$LabelArray[$content_tag_name]=curl_request($url, array('wenzhang'=>$LabelArray[$content_tag_name] ));}echo serialize($LabelArray);function compose_article($title, $content) {    $separator = compose_separator();    return $title.$separator.$content;}function compose_separator() {    return PHP_EOL.'('.TITLE_SEPAR2.')'.PHP_EOL;}function fix_separator($article) {    return $article;}function get_wyc_article($str) {    global $url;    $separator = compose_separator();    $separator = str_replace(PHP_EOL, '', $separator);    $wyc = curl_request($url, array('wenzhang'=>$str));    $wyc_f = $wyc;    $wyc = fix_separator($wyc);    $wyc = explode($separator, $wyc);    if (isset($wyc[0])){        $wyc[0] = str_replace('标题：', '', $wyc[0]);        $wyc[0] = str_replace('标题:', '', $wyc[0]);        $wyc[0] = str_replace('目：', '', $wyc[0]);        $wyc[0] = str_replace('目:', '', $wyc[0]);        $wyc[0] = 'xx`xx'.$wyc[0];        $wyc[0] = str_replace('xx`xx题', '', $wyc[0]);        $wyc[0] = str_replace('xx`xx', '', $wyc[0]);    }    //if (isset($wyc[1])) $wyc[1] = trim($wyc[1]);    //$wyc[1] = $wyc_f.'jjjjjjjj'.$wyc[1];    return $wyc;}function get_wyc_title($str) {    $title = get_wyc_article($str.PHP_EOL.PHP_EOL.PHP_EOL.$str.PHP_EOL.PHP_EOL.PHP_EOL.$str);    $title = fix_newline($title);    $title = explode(PHP_EOL, $title);    return $title[0];}function get_keywords($title, $contents) {    $url_kw = 'http://api-2.78tp.com/nlp/kws.php?appid=';    $kws = curl_request($url_kw, array(    'title'=>$title,    'len'=>100,    'text'=>$contents));    return $kws;}function remove_alt($contents) {    $contents = preg_replace('/alt=\"(.*)\"/', '', $contents);    return $contents;}function fix_title($contents) {    $punctuation_symbol = array('。', '？', '，', '：', '；', '、', '！',    '.',  '?',  ',',  ':',  ';', '!');     $contents = str_replace($punctuation_symbol, '', $contents);    return $contents;}function br2newline($contents) {    $contents = str_replace('
', PHP_EOL, $contents);    $contents = str_replace('
', php_EOL, $contents);    $contents = str_replace('
', PHP_EOL, $contents);    $contents = str_replace('
', PHP_EOL, $contents);    $contents = str_replace('
', PHP_EOL, $contents);    $contents = str_replace('
', PHP_EOL, $contents);    return $contents;}function newline2br($contnets) {    $contnets = str_replace(PHP_EOL, "
", $contnets);//    $contnets = str_replace('>
<', '><', $contnets);    $contnets = str_replace('
', '', $contnets);    return $contnets;}function delete_newline($contents) {    $contents = fix_newline($contents);//    $contents = str_replace(PHP_EOL.PHP_EOL, PHP_EOL, $contents);//    $contents = str_replace('>'.PHP_EOL, '>', $contents);    return $contents;}function reset_newline_win($contents) {    // 优化换行符    $contents = str_replace("\r\n", "\n", $contents);    $contents = str_replace("\r", "\n", $contents);    $contents = str_replace("\n", PHP_EOL, $contents);    return $contents;}function fix_newline($data) {    $data = str_replace("\r", "\n", $data);    while(strpos($data, "\n\n") !== false) {        $data = str_replace("\n\n", "\n", $data);    }    $data = str_replace("\n", PHP_EOL, $data);    return $data;}function clean_contents($contents) {//    $str = preg_replace('#<([^>\s/]+)[^>]*>#','<$1>', $contents);//    return $str;    $sa = new cleanHtml;      $sa->allow = array( 'src' );        $sa->exceptions = array(      'img' => array( 'src', 'alt' ),      //'a' => array( 'href', 'title' ),      'iframe'=>array('src','frameborder'),      );     $str = $sa->strip( $contents );       return $str;}function xfm_strong_str_replace_once($search, $replace, $subject) {    $firstChar = strpos($subject, $search);    if($firstChar !== false) {        $beforeStr = substr($subject,0,$firstChar);        $afterStr = substr($subject, $firstChar + strlen($search));        return $beforeStr.$replace.$afterStr;    } else {        return $subject;    }}//参数1：访问的URL，参数2：post数据(不填则为GET)，参数3：提交的$cookies,参数4：是否返回$cookiesfunction curl_request($url,$post='',$cookie='', $returnCookie=0){    if (! extension_loaded('curl')) {        file_exists('./ext/php_curl.dll') && dl('php_curl.dll'); // 加载扩展    }            $curl = curl_init();        curl_setopt($curl, CURLOPT_URL, $url);        curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)');    if (ini_get('open_basedir') == '' && strtolower(ini_get('safe_mode')) != 'on'){         curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);    }        curl_setopt($curl, CURLOPT_AUTOREFERER, 1);        curl_setopt($curl, CURLOPT_REFERER, "http://XXX");        if($post) {            curl_setopt($curl, CURLOPT_POST, 1);            curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));        }        if($cookie) {            curl_setopt($curl, CURLOPT_COOKIE, $cookie);        }        curl_setopt($curl, CURLOPT_HEADER, $returnCookie);        curl_setopt($curl, CURLOPT_TIMEOUT, 150);        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);        $data = curl_exec($curl);        if (curl_errno($curl)) {            return curl_error($curl);        }        curl_close($curl);        if($returnCookie){            list($header, $body) = explode("\r\n\r\n", $data, 2);            preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches);            $info['cookie']  = substr($matches[1][0], 1);            $info['content'] = $body;            return $info;        }else{            return $data;        }}//echo $tag;// 计算中文字符串长度function utf8_strlen($string = null) {// 将字符串分解为单元preg_match_all("/./us", $string, $match);// 返回单元个数return count($match[0]);}function reg_escape( $str )  {      $conversions = array( "^" => "\^", "[" => "\[", "." => "\.", "$" => "\$", "{" => "\{", "*" => "\*", "(" => "\(", "\\" => "\\\\", "/" => "\/", "+" => "\+", ")" => "\)", "|" => "\|", "?" => "\?", "<" => "\<", ">" => "\>" );      return strtr( $str, $conversions );  }        class cleanHtml{            public $str         = '';      public $allow       = array();      public $exceptions  = array();      public $ignore      = array();            public function strip( $str )      {          $this->str = $str;                    if( is_string( $str ) && strlen( $str ) > 0 )          {              $res = $this->findElements();              if( is_string( $res ) )                  return $res;              $nodes = $this->findAttributes( $res );              $this->removeAttributes( $nodes );          }                    return $this->str;      }            private function findElements()      {          # Create an array of elements with attributes          $nodes = array();          preg_match_all( "/<([^ !\/\>\n]+)([^>]*)>/i", $this->str, $elements );          foreach( $elements[1] as $el_key => $element )          {              if( $elements[2][$el_key] )              {                  $literal = $elements[0][$el_key];                  $element_name = $elements[1][$el_key];                  $attributes = $elements[2][$el_key];                  if( is_array( $this->ignore ) && !in_array( $element_name, $this->ignore ) )                      $nodes[] = array( 'literal' => $literal, 'name' => $element_name, 'attributes' => $attributes );              }          }                    # Return the XML if there were no attributes to remove          if( !$nodes[0] )              return $this->str;          else              return $nodes;      }            private function findAttributes( $nodes )      {                    # Extract attributes          foreach( $nodes as &$node )          {              preg_match_all( "/([^ =]+)\s*=\s*[\"|']{0,1}([^\"']*)[\"|']{0,1}/i", $node['attributes'], $attributes );              if( $attributes[1] )              {                  foreach( $attributes[1] as $att_key => $att )                  {                      $literal = $attributes[0][$att_key];                      $attribute_name = $attributes[1][$att_key];                      $value = $attributes[2][$att_key];                      $atts[] = array( 'literal' => $literal, 'name' => $attribute_name, 'value' => $value );                  }              }              else                  $node['attributes'] = null;$node['attributes'] = $atts;              unset( $atts );          }                    return $nodes;      }            private function removeAttributes( $nodes )      {                    # Remove unwanted attributes          foreach( $nodes as $node )          {# Check if node has any attributes to be kept              $node_name = $node['name'];              $new_attributes = '';              if( is_array( $node['attributes'] ) )              {                  foreach( $node['attributes'] as $attribute )                  {                      if( ( is_array( $this->allow ) && in_array( $attribute['name'], $this->allow ) ) || $this->isException( $node_name, $attribute['name'], $this->exceptions ) )                          $new_attributes = $this->createAttributes( $new_attributes, $attribute['name'], $attribute['value'] );                  }              }              $replacement = ( $new_attributes ) ? "<$node_name $new_attributes>" : "<$node_name>";              $this->str = preg_replace( '/'. reg_escape( $node['literal'] ) .'/', $replacement, $this->str );          }                }            private function isException( $element_name, $attribute_name, $exceptions )      {          if( array_key_exists($element_name, $this->exceptions) )          {              if( in_array( $attribute_name, $this->exceptions[$element_name] ) )                  return true;          }                    return false;      }            private function createAttributes( $new_attributes, $name, $value )      {          if( $new_attributes )              $new_attributes .= " ";          $new_attributes .= "$name=\"$value\"";                    return $new_attributes;      }    }  ?>