1
0
mirror of https://gitee.com/koogua/course-tencent-cloud.git synced 2025-06-22 19:44:02 +08:00
2020-05-07 20:18:37 +08:00

70 lines
1.5 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Library\Utils;
class Word
{
const CHINESE_PATTERN = '/[\x80-\xff]{1,3}/';
public static function getWordCount($str)
{
$chineseWordCount = self::getChineseWordCount($str);
$str = self::filterChineseWords($str);
$englishWordCount = self::getEnglishWordCount($str);
$count = $chineseWordCount + $englishWordCount;
return (int)$count;
}
public static function getWordDuration($str)
{
$count = self::getWordCount($str);
$duration = $count * 0.8;
return (int)$duration;
}
public static function getChineseWordCount($str)
{
$str = strip_tags($str);
$str = self::filterChineseSymbols($str);
preg_replace(self::CHINESE_PATTERN, '', $str, -1, $count);
return (int)$count;
}
public static function getEnglishWordCount($str)
{
$str = strip_tags($str);
$count = str_word_count($str);
return (int)$count;
}
public static function filterChineseWords($str)
{
return preg_replace(self::CHINESE_PATTERN, '', $str);
}
public static function filterChineseSymbols($str)
{
$search = [
'', '', '〈', '〉', '《', '》', '「', '」',
'『', '』', '﹃', '﹄', '', '', '…', '—',
'', '', '¥', '、', '【', '】', '', '。',
'', '', '', '', '“ ', '”', '', '',
];
return str_replace($search, '', $str);
}
}