fenci/to_db.php
2020-09-30 19:56:27 +08:00

57 lines
1.7 KiB
PHP

<?php
ini_set('memory_limit', '1024M');
mb_internal_encoding('UTF-8');
include 'vendor/autoload.php';
$words = json_decode(file_get_contents(__DIR__ . '/app/files/dict.json'), true);
$db = new \Medoo\Medoo([
'database_type' => 'mysql',
'database_name' => 'youtube_zimu',
'server' => '127.0.0.1',
'username' => 'root',
'password' => '123456',
'charset' => 'utf8mb4'
]);
$t1 = microtime(true);
$count = 0;
$tempWords = [];
$cw = '';
$word = '';
$pinyin = '';
$english = [];
foreach ($words as $ch) {
$tempWords = [];
$cw = '';
$word = '';
$insertData = [];
foreach ($ch as $data) {
$cw = $data['simplified'] . strtolower($data['pinyinDiacritic']);
if(in_array($cw,$tempWords)){
$insertData[$cw]['english'] = array_merge($insertData[$cw]['english'],$data['englishExpanded']);
continue;
}
$count++;
echo $count . "." . $cw . "\n";
$tempWords[] = $cw;
$insertData[$cw] = [
'word' => $data['simplified'],
'pinyin' => $data['pinyinDiacritic'],
'english' => $data['englishExpanded']
];
}
foreach ($insertData as $rowData){
$rowData['english'] = json_encode($rowData['english'], JSON_UNESCAPED_UNICODE);
$db->insert('dictionary', $rowData);
}
// $db->insert('dictionary', [
// 'word' => $data['simplified'],
// 'pinyin' => $data['pinyinDiacritic'],
// 'english' => json_encode($data['englishExpanded'], JSON_UNESCAPED_UNICODE),
// ]);
}
$t2 = microtime(true);
$time = round($t2 - $t1, 3);
$mem = memory_get_usage() / (1.0 * 1024 * 1024);
echo "time:{$time}ms memory:{$mem}mb";