first commit

This commit is contained in:
LittleBoy 2020-09-30 19:56:27 +08:00
commit 7e53d0ed0c
15 changed files with 119034 additions and 0 deletions

22
.gitignore vendored Normal file
View File

@ -0,0 +1,22 @@
.DS_Store
*.bak
vendor
commit.sh
# local env files
.env
# runtime files
runtime/log
runtime/session
*php_errors*
*nohup*
# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

76
app/Api.php Normal file
View File

@ -0,0 +1,76 @@
<?php
namespace app;
use Fukuball\Jieba\Finalseg;
use Fukuball\Jieba\Jieba;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
abstract class Api
{
/**
* @param Request $request
* @return Response
*/
public abstract function handle(Request $request);
private static $init = false;
public static function initOnce()
{
if (self::$init) {
return;
}
echo "[info]", date('H:i:s'), " start load jieba dictionary ...\n";
Jieba::init(['mode' => 'default', 'dict' => 'small']);
Finalseg::init();
echo "[info]", date('H:i:s'), " loaded jieba dictionary!\n";
self::$init = true;
}
/**
* @param string $string
* @param string|null $tag
*/
protected function log($string, $tag = "info")
{
echo "[{$tag}]", date('H:i:s'), " {$string} \n";
}
protected function success($data)
{
return $this->json(['code' => 0, 'data' => $data]);
}
/**
* @param string $message
* @param int $code
* @return Response
*/
protected function error($message = '', $code = -1)
{
return $this->json(['code' => $code, 'message' => $message]);
}
public function json($data)
{
return new Response(200, ['Content-Type' => 'application/json',
'Access-Control-Allow-Origin' => '*',
'Access-Control-Allow-Credentials' => 'true',
], is_string($data) ? $data : json_encode($data, JSON_UNESCAPED_UNICODE));
}
/**
* @param $string
* @return Response
*/
public function response($string)
{
return new Response(200, [], $string);
}
}

86
app/Dictionary.php Normal file
View File

@ -0,0 +1,86 @@
<?php
namespace app;
use CcCedict\Entry;
use CcCedict\Parser;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
/**
* Class Dictionary
* The word dictionary of this website is based on CC-CEDICT.
* CC-CEDICT is a continuation of the CEDICT project started by Paul Denisowski in 1997 with the aim to provide a complete
* downloadable Chinese to English dictionary with pronunciation in pinyin for the Chinese characters.
* This website allows you to easily add new entries or correct existing entries in CC-CEDICT.
* Submitted entries will be checked and processed frequently and released for download in CEDICT format on this page.
* https://www.mdbg.net/chinese/dictionary?page=cc-cedict
* @package app
*/
class Dictionary extends Api
{
private $dictionary = [];
public function __construct()
{
if (empty($this->dictionary)) $this->initDictionary();
}
private function initDictionary()
{
$this->log("start load english dictionary ...");
$file = __DIR__ . '/files/cedict_ts.u8';
$dict_file = __DIR__ . '/files/dict.json';
if (file_exists($dict_file)) {
$this->dictionary = json_decode(file_get_contents($dict_file), true);
$this->log("loaded dictionary!");
return;
}
$parser = new Parser();
$parser->setOptions([
Entry::F_SIMPLIFIED,
Entry::F_PINYIN_DIACRITIC,
Entry::F_ENGLISH_EXPANDED,
// Entry::F_PINYIN_DIACRITIC_EXPANDED,
// Entry::F_ORIGINAL,
]);
$parser->setFilePath($file);
$dict = [];
$this->log("start parse dictionary ...");
foreach ($parser->parse() as $output) {
foreach ($output['parsedLines'] as $line) {
if(!isset($dict[$line['simplified']])){
$dict[$line['simplified']] = [];
}
$dict[$line['simplified']][] = $line;
}
}
file_put_contents($dict_file, json_encode($dict, JSON_UNESCAPED_UNICODE));
$this->dictionary = $dict;
$this->log("load english dictionary finish!");
}
/**
* @param Request $request
* @return Response
* @throws \Exception
*/
public function handle(Request $request)
{
$word = $request->get('word');
if (empty($word)) return $this->error('need query word');
if (isset($this->dictionary[$word])) {
return $this->success($this->dictionary[$word]);
}
return $this->json([]);
}
}

32
app/Fenci.php Normal file
View File

@ -0,0 +1,32 @@
<?php
namespace app;
use Fukuball\Jieba\Jieba;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
/**
* 分词
* Class Fenci
* @package app
*/
class Fenci extends Api
{
public function __construct()
{
self::initOnce();
}
/**
* @param Request $request
* @return Response
*/
public function handle(Request $request)
{
$text = $request->get('text', "我要分词");
$arr = Jieba::cut($text);
return $this->success($arr);
}
}

42
app/FenciArray.php Normal file
View File

@ -0,0 +1,42 @@
<?php
namespace app;
use Fukuball\Jieba\Jieba;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
class FenciArray extends Api
{
public function __construct()
{
self::initOnce();
}
/**
* @param Request $request
* @return Response
*/
public function handle(Request $request)
{
$textArray = $request->post('text');
$textArray = empty($textArray) ? null : json_decode($textArray, true);
if (empty($textArray)) {
return $this->error('参数text不可为空且必须为json数组');
}
if(is_array($textArray)){
foreach ($textArray as $i => $text) {
try{
$textArray[$i] = Jieba::cut($text);
}catch (\Exception $e){
$this->log("cut {$text} error,reason:".$e->getMessage());
$textArray[$i] = [$text];
}
}
}
return $this->success($textArray);
}
}

22
app/Info.php Normal file
View File

@ -0,0 +1,22 @@
<?php
namespace app;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
class Info extends Api
{
/**
* @param Request $request
* @return Response
*/
public function handle(Request $request)
{
return $this->json(['xxx']);
}
}

21
app/Pinyin.php Normal file
View File

@ -0,0 +1,21 @@
<?php
namespace app;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
class Pinyin extends Api
{
/**
* @param Request $request
* @return Response
*/
public function handle(Request $request)
{
}
}

118254
app/files/cedict_ts.u8 Normal file

File diff suppressed because it is too large Load Diff

1
app/files/dict.json Normal file

File diff suppressed because one or more lines are too long

55
app/helper/Router.php Normal file
View File

@ -0,0 +1,55 @@
<?php
namespace app\helper;
use app\Api;
use Workerman\Protocols\Http\Request;
use Workerman\Protocols\Http\Response;
class Router
{
private static $Routes = [];
private static $Instance = [];
private function __construct()
{
}
/**
* @param $path
* @param $handler
*/
public static function add($path, $handler)
{
self::$Routes[$path] = $handler;
}
public static function process(Request $request)
{
$path = $request->path();
echo "[INFO]", date('h:i:s'), " request {$path} \n";
if (isset(self::$Routes[$path])) {
try{
$handler = self::$Routes[$path];
if(!isset(self::$Instance[$handler])){
echo "[INFO]", date('H:i:s'), " initial instance of {$handler}\n";
self::$Instance[$handler] = new $handler();
}
if (self::$Instance[$handler] instanceof Api) {
$result = self::$Instance[$handler]->handle($request);
if($result instanceof Response){
return $result;
}
return new Response(200, [], $result);
}
}catch (\Exception $e){
return new Response(500, [], $e->getMessage());
}
// print_r($handler);
}
return new Response(200, [], '404');
}
}

21
composer.json Normal file
View File

@ -0,0 +1,21 @@
{
"name": "yaclty2/fenci",
"authors": [
{
"name": "callmeyan",
"email": "yaclty2@gmail.com"
}
],
"require": {
"workerman/workerman": "^4.0",
"fukuball/jieba-php": "^0.33.0",
"overtrue/pinyin": "^4.0",
"mdsills/cccedict": "dev-master",
"catfan/medoo": "^1.7"
},
"autoload": {
"psr-4": {
"app\\":"app"
}
}
}

305
composer.lock generated Normal file
View File

@ -0,0 +1,305 @@
{
"_readme": [
"This file locks the dependencies of your project to a known state",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "83604bb0176e2f24ea7130d7e4e4c2f9",
"packages": [
{
"name": "catfan/medoo",
"version": "v1.7.10",
"source": {
"type": "git",
"url": "https://github.com/catfan/Medoo.git",
"reference": "2d675f73e23f63bbaeb9a8aa33318659a3d3c32f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/catfan/Medoo/zipball/2d675f73e23f63bbaeb9a8aa33318659a3d3c32f",
"reference": "2d675f73e23f63bbaeb9a8aa33318659a3d3c32f",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"ext-pdo": "*",
"php": ">=5.4"
},
"suggest": {
"ext-pdo_dblib": "For MSSQL or Sybase database on Linux/UNIX platform",
"ext-pdo_mysql": "For MySQL or MariaDB database",
"ext-pdo_oci": "For Oracle database",
"ext-pdo_oci8": "For Oracle version 8 database",
"ext-pdo_pqsql": "For PostgreSQL database",
"ext-pdo_sqlite": "For SQLite database",
"ext-pdo_sqlsrv": "For MSSQL database on both Window/Liunx platform"
},
"type": "framework",
"autoload": {
"psr-4": {
"Medoo\\": "/src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Angel Lai",
"email": "angel@catfan.me"
}
],
"description": "The lightweight PHP database framework to accelerate development",
"homepage": "https://medoo.in",
"keywords": [
"database",
"database library",
"lightweight",
"mariadb",
"mssql",
"mysql",
"oracle",
"php framework",
"postgresql",
"sql",
"sqlite"
],
"time": "2020-02-11T08:20:42+00:00"
},
{
"name": "fukuball/jieba-php",
"version": "0.33",
"source": {
"type": "git",
"url": "https://github.com/fukuball/jieba-php.git",
"reference": "a5980dc88ae98a261a0bdb1130be14d0136b373f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fukuball/jieba-php/zipball/a5980dc88ae98a261a0bdb1130be14d0136b373f",
"reference": "a5980dc88ae98a261a0bdb1130be14d0136b373f",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"php": ">= 5.3"
},
"require-dev": {
"phpunit/phpunit": "4.4.1",
"satooshi/php-coveralls": "dev-master",
"squizlabs/php_codesniffer": "1.5.6"
},
"type": "library",
"autoload": {
"files": [
"src/class/Jieba.php",
"src/class/JiebaAnalyse.php",
"src/class/Finalseg.php",
"src/class/Posseg.php",
"src/vendor/multi-array/MultiArray.php",
"src/vendor/multi-array/Factory/MultiArrayFactory.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "fukuball",
"email": "fukuball@gmail.com"
}
],
"description": "結巴中文分詞PHP 版本):做最好的 PHP 中文分詞、中文斷詞組件",
"keywords": [
"Jieba",
"php"
],
"time": "2017-11-22T10:02:12+00:00"
},
{
"name": "mdsills/cccedict",
"version": "dev-master",
"source": {
"type": "git",
"url": "https://github.com/mdsills/cccedict.git",
"reference": "494784f905a9e0572c539efa6d277ed48dfef1f7"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mdsills/cccedict/zipball/494784f905a9e0572c539efa6d277ed48dfef1f7",
"reference": "494784f905a9e0572c539efa6d277ed48dfef1f7",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"php": ">=7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"CcCedict\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "Parser for CC-CEDICT Chinese-English dictionary",
"homepage": "https://github.com/mdsills/cccedict",
"keywords": [
"Chinese",
"cc-cedict",
"dictionary"
],
"time": "2018-03-27T16:22:56+00:00"
},
{
"name": "overtrue/pinyin",
"version": "4.0.6",
"source": {
"type": "git",
"url": "https://github.com/overtrue/pinyin.git",
"reference": "9836dae1783bcf5934508a2abe32eca9ea5ee15d"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/overtrue/pinyin/zipball/9836dae1783bcf5934508a2abe32eca9ea5ee15d",
"reference": "9836dae1783bcf5934508a2abe32eca9ea5ee15d",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"php": ">=7.1"
},
"require-dev": {
"brainmaestro/composer-git-hooks": "^2.7",
"friendsofphp/php-cs-fixer": "^2.16",
"phpunit/phpunit": "~8.0"
},
"type": "library",
"extra": {
"hooks": {
"pre-commit": [
"composer test",
"composer fix-style"
],
"pre-push": [
"composer test",
"composer check-style"
]
}
},
"autoload": {
"psr-4": {
"Overtrue\\Pinyin\\": "src/"
},
"files": [
"src/const.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "overtrue",
"email": "anzhengchao@gmail.com",
"homepage": "http://github.com/overtrue"
}
],
"description": "Chinese to pinyin translator.",
"homepage": "https://github.com/overtrue/pinyin",
"keywords": [
"Chinese",
"Pinyin",
"cn2pinyin"
],
"time": "2020-04-13T08:53:30+00:00"
},
{
"name": "workerman/workerman",
"version": "v4.0.4",
"source": {
"type": "git",
"url": "https://github.com/walkor/Workerman.git",
"reference": "88e518903d2f61d6cdfe1d381b40e03be2a9ef94"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/walkor/Workerman/zipball/88e518903d2f61d6cdfe1d381b40e03be2a9ef94",
"reference": "88e518903d2f61d6cdfe1d381b40e03be2a9ef94",
"shasum": "",
"mirrors": [
{
"url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%",
"preferred": true
}
]
},
"require": {
"php": ">=5.3"
},
"suggest": {
"ext-event": "For better performance. "
},
"type": "library",
"autoload": {
"psr-4": {
"Workerman\\": "./"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "walkor",
"email": "walkor@workerman.net",
"homepage": "http://www.workerman.net",
"role": "Developer"
}
],
"description": "An asynchronous event driven PHP framework for easily building fast, scalable network applications.",
"homepage": "http://www.workerman.net",
"keywords": [
"asynchronous",
"event-loop"
],
"time": "2020-04-20T02:20:21+00:00"
}
],
"packages-dev": [],
"aliases": [],
"minimum-stability": "stable",
"stability-flags": {
"mdsills/cccedict": 20
},
"prefer-stable": false,
"prefer-lowest": false,
"platform": [],
"platform-dev": []
}

35
index.php Normal file
View File

@ -0,0 +1,35 @@
<?php
date_default_timezone_set("PRC");
ini_set('memory_limit', '1024M');
mb_internal_encoding('UTF-8');
include 'vendor/autoload.php';
include 'routes.php';
use Workerman\Worker;
Worker::$stdoutFile = __DIR__ . '/stdout.log';
// 创建一个Worker监听2345端口使用http协议通讯
$http_worker = new Worker("http://0.0.0.0:10008");
$http_worker->name = 'Http Server Worker';
// 启动4个进程对外提供服务
$http_worker->count = 2;
// 接收到浏览器发送的数据时回复数据给浏览器
$http_worker->onMessage = function ($connection,$request) {
$t1 = microtime(true);
$response = \app\helper\Router::process($request);
$t2 = microtime(true);
$time = round($t2 - $t1, 3);
$mem = memory_get_usage() / (1.0 * 1024 * 1024);
$response->withHeaders([
'X-Time' => $time .'ms',
'X-Memory' => ceil($mem).'MB'
]);
$connection->send($response);
};
// 运行worker
Worker::runAll();

6
routes.php Normal file
View File

@ -0,0 +1,6 @@
<?php
\app\helper\Router::add("/fenci",\app\Fenci::class);
\app\helper\Router::add("/fenci-array",\app\FenciArray::class);
\app\helper\Router::add("/pinyin",\app\Pinyin::class);
\app\helper\Router::add("/dict",\app\Dictionary::class);
\app\helper\Router::add("/info",\app\Pinyin::class);

56
to_db.php Normal file
View File

@ -0,0 +1,56 @@
<?php
ini_set('memory_limit', '1024M');
mb_internal_encoding('UTF-8');
include 'vendor/autoload.php';
$words = json_decode(file_get_contents(__DIR__ . '/app/files/dict.json'), true);
$db = new \Medoo\Medoo([
'database_type' => 'mysql',
'database_name' => 'youtube_zimu',
'server' => '127.0.0.1',
'username' => 'root',
'password' => '123456',
'charset' => 'utf8mb4'
]);
$t1 = microtime(true);
$count = 0;
$tempWords = [];
$cw = '';
$word = '';
$pinyin = '';
$english = [];
foreach ($words as $ch) {
$tempWords = [];
$cw = '';
$word = '';
$insertData = [];
foreach ($ch as $data) {
$cw = $data['simplified'] . strtolower($data['pinyinDiacritic']);
if(in_array($cw,$tempWords)){
$insertData[$cw]['english'] = array_merge($insertData[$cw]['english'],$data['englishExpanded']);
continue;
}
$count++;
echo $count . "." . $cw . "\n";
$tempWords[] = $cw;
$insertData[$cw] = [
'word' => $data['simplified'],
'pinyin' => $data['pinyinDiacritic'],
'english' => $data['englishExpanded']
];
}
foreach ($insertData as $rowData){
$rowData['english'] = json_encode($rowData['english'], JSON_UNESCAPED_UNICODE);
$db->insert('dictionary', $rowData);
}
// $db->insert('dictionary', [
// 'word' => $data['simplified'],
// 'pinyin' => $data['pinyinDiacritic'],
// 'english' => json_encode($data['englishExpanded'], JSON_UNESCAPED_UNICODE),
// ]);
}
$t2 = microtime(true);
$time = round($t2 - $t1, 3);
$mem = memory_get_usage() / (1.0 * 1024 * 1024);
echo "time:{$time}ms memory:{$mem}mb";