您好,欢迎来到三六零分类信息网!老站,搜索引擎当天收录,欢迎发信息

PHP敏感词过滤

2024/3/20 8:23:53发布26次查看
/** * 禁词过滤 * 执行效率:每篇用时0.05秒 * @author liuxu * */class logic_blackword{ const app_forum = 1; const app_blog = 2; const app_vote = 3; /** * 过滤得到禁词 * @param unknown $txt * @return ambigous */ public function gethitlist($txt) { $hitlist = array(); //对禁词分批过滤 $max = $this->getmax(); if($max) { $size = 1000; $last = ceil($max/$size); for($page=1;$page { $result = $this->gethitlistbypage($txt,$page,$size); if($result) $hitlist = array_merge($hitlist,$result); } } $hitlist2 = array(); foreach($hitlist as $hit=>$type) { $hitlist2[$type][] = $hit; } return $hitlist2; } private function getmax() { $redis = rds::factory(); $memkey = 'blackword_max'; $max = $redis->get($memkey); if($max===false) { $max = 0; $blackword = new model_blackword_blackword(); $para['field'] = max(id) as max; $result = $blackword->search($para); if(isset($result[0]['max'])) $max = $result[0]['max']; $redis->setex($memkey,300,$max); } return $max; } /** * 分批过滤得到禁词 * @param unknown $txt * @param number $page * @param number $size * @return multitype:ambigous */ private function gethitlistbypage($txt,$page=1,$size=1000) { $hitlist = array(); //分批得到禁词树 $wordtree = $this->getwordtreebypage($page,$size); $txt = strip_tags($txt); $txt = preg_replace('/[^a-za-z0-9\\x{4e00}-\\x{9fa5}]/iu','',$txt); $len = mb_strlen($txt,'utf-8'); for($i=0;$i { $char = mb_substr($txt,$i,1,'utf-8'); if(isset($wordtree[$char])) { $result = $this->gethitlistbytree(mb_substr($txt,$i,50,'utf-8'),$wordtree); if($result) { foreach($result as $hit=>$type) { $hitlist[$hit] = $type; } } } } return $hitlist; } /** * 是否禁词 * @param str $txt * @param arr $wordtree * @return multitype:unknown */ private function gethitlistbytree($txt,&$wordtree) { $len = mb_strlen($txt,'utf-8'); $point = & $wordtree; $hit = ''; $hitlist = array(); for($i=0;$i { $char = mb_substr($txt,$i,1,'utf-8'); if(isset($point[$char])) { $hit .= $char; $point = & $point[$char]; if(isset($point['type']))//匹配成功 { $hitlist[$hit] = $point['type']; } } else { break; } } return $hitlist; } /** * 分批得到禁词树 * @param int $page * @param int $size * @return arr: */ private function getwordtreebypage($page=1,$size=1000) { $redis = rds::factory(); $memkey = 'blackword_tree_'.$page.'_'.$size; $wordtree = $redis->get($memkey); if($wordtree===false) { $wordtree = array(); $blackword = new model_blackword_blackword(); $start = ($page-1)*$size; $end = $start + $size; $para['where'] = status=1 and id>.$start. and id $result = $blackword->search($para); if($result) { foreach($result as $value) { if($value['word']) { $value['word'] = preg_split('/(? $point = & $wordtree; foreach($value['word'] as $char) { $point = & $point[$char]; } $point['type'] = $value['type']; } } } $redis->setex($memkey,300,$wordtree); } return $wordtree; } }
复制代码
php
该用户其它信息

VIP推荐

免费发布信息,免费发布B2B信息网站平台 - 三六零分类信息网 沪ICP备09012988号-2
企业名录 Product