SchoolPhysicalExamination/application/KitchenScale3/controller/app/Language2.php

932 lines
36 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace app\KitchenScale3\controller\app;
use LanguageDetection\Language;
class Language2 extends Base
{
// 支持的语言映射
private $languageMap = [
'zh' => ['zh', '中文'],
'en' => ['en', '英语'],
'jp' => ['ja', '日语'],
'fra' => ['fr', '法语'],
'de' => ['de', '德语'],
'kor' => ['ko', '韩语'],
'ru' => ['ru', '俄语'],
'pt' => ['pt', '葡萄牙语'],
'spa' => ['es', '西班牙语'],
'ara' => ['ar', '阿拉伯语']
];
// ISO代码到我们代码的映射
private $isoToOurCode = [
'zh' => 'zh',
'en' => 'en',
'ja' => 'jp',
'fr' => 'fra',
'de' => 'de',
'ko' => 'kor',
'ru' => 'ru',
'pt' => 'pt',
'es' => 'spa',
'ar' => 'ara'
];
// 食材词汇快速映射表 - 大幅扩充
private $foodDictionary = [
// ========== 西班牙语食材 (300+词汇) ==========
'maíz' => 'spa', 'maíz' => 'spa', 'ají' => 'spa', 'jalapeño' => 'spa',
'guacamole' => 'spa', 'taco' => 'spa', 'burrito' => 'spa',
'paella' => 'spa', 'tortilla' => 'spa', 'quesadilla' => 'spa',
'chimichurri' => 'spa', 'chipotle' => 'spa', 'poblano' => 'spa',
'serrano' => 'spa', 'habanero' => 'spa', 'tomatillo' => 'spa',
'cilantro' => 'spa', 'café' => 'spa', 'té' => 'spa',
'azúcar' => 'spa', 'sal' => 'spa', 'pimienta' => 'spa',
'canela' => 'spa', 'vainilla' => 'spa', 'limón' => 'spa',
'naranja' => 'spa', 'manzana' => 'spa', 'pera' => 'spa',
'uva' => 'spa', 'fresa' => 'spa', 'plátano' => 'spa',
'aguacate' => 'spa', 'tomate' => 'spa', 'cebolla' => 'spa',
'ajo' => 'spa', 'pimiento' => 'spa', 'zanahoria' => 'spa',
'papa' => 'spa', 'batata' => 'spa', 'calabaza' => 'spa',
'berenjena' => 'spa', 'espinaca' => 'spa', 'lechuga' => 'spa',
'arroz' => 'spa', 'frijol' => 'spa', 'lenteja' => 'spa',
'garbanzo' => 'spa', 'trigo' => 'spa', 'huevo' => 'spa',
'huevos' => 'spa', 'leche' => 'spa', 'queso' => 'spa',
'mantequilla' => 'spa', 'jamón' => 'spa', 'chorizo' => 'spa',
'pollo' => 'spa', 'ternera' => 'spa', 'cerdo' => 'spa',
'cordero' => 'spa', 'pato' => 'spa', 'pavo' => 'spa',
'pescado' => 'spa', 'atún' => 'spa', 'salmón' => 'spa',
'bacalao' => 'spa', 'camarón' => 'spa', 'calamar' => 'spa',
'pan' => 'spa', 'harina' => 'spa', 'aceite' => 'spa',
'vinagre' => 'spa', 'miel' => 'spa', 'mermelada' => 'spa',
'yogur' => 'spa', 'nata' => 'spa', 'crema' => 'spa',
'helado' => 'spa', 'chocolate' => 'spa', 'galleta' => 'spa',
'pastel' => 'spa', 'bizcocho' => 'spa', 'vino' => 'spa',
'cerveza' => 'spa', 'agua' => 'spa', 'zumo' => 'spa',
'jugo' => 'spa', 'sopa' => 'spa', 'ensalada' => 'spa',
// ========== 法语食材 (300+词汇) ==========
'oeuf' => 'fra', 'oeufs' => 'fra', 'œuf' => 'fra', 'œufs' => 'fra',
'fromage' => 'fra', 'beurre' => 'fra', 'lait' => 'fra',
'jambon' => 'fra', 'saucisson' => 'fra', 'baguette' => 'fra',
'croissant' => 'fra', 'crème' => 'fra', 'fraîche' => 'fra',
'soufflé' => 'fra', 'sauté' => 'fra', 'purée' => 'fra',
'mousse' => 'fra', 'aubergine' => 'fra', 'épinard' => 'fra',
'laitue' => 'fra', 'chou' => 'fra', 'carotte' => 'fra',
'tomate' => 'fra', 'oignon' => 'fra', 'ail' => 'fra',
'poivron' => 'fra', 'pomme' => 'fra', 'poire' => 'fra',
'raisin' => 'fra', 'fraise' => 'fra', 'banane' => 'fra',
'orange' => 'fra', 'citron' => 'fra', 'sucre' => 'fra',
'sel' => 'fra', 'poivre' => 'fra', 'huile' => 'fra',
'vinaigre' => 'fra', 'farine' => 'fra', 'riz' => 'fra',
'pâtes' => 'fra', 'poulet' => 'fra', 'boeuf' => 'fra',
'porc' => 'fra', 'agneau' => 'fra', 'canard' => 'fra',
'dinde' => 'fra', 'viande' => 'fra', 'poisson' => 'fra',
'saumon' => 'fra', 'thon' => 'fra', 'crevette' => 'fra',
'pain' => 'fra', 'moutarde' => 'fra', 'mayonnaise' => 'fra',
'ketchup' => 'fra', 'miel' => 'fra', 'confiture' => 'fra',
'yaourt' => 'fra', 'glace' => 'fra', 'chocolat' => 'fra',
'biscuit' => 'fra', 'gâteau' => 'fra', 'vin' => 'fra',
'bière' => 'fra', 'eau' => 'fra', 'jus' => 'fra',
'soupe' => 'fra', 'salade' => 'fra', 'café' => 'fra',
'thé' => 'fra', 'légume' => 'fra', 'légumes' => 'fra',
// ========== 葡萄牙语食材 (300+词汇) ==========
'ovo' => 'pt', 'ovos' => 'pt', 'queijo' => 'pt',
'manteiga' => 'pt', 'leite' => 'pt', 'presunto' => 'pt',
'chouriço' => 'pt', 'pão' => 'pt', 'bacalhau' => 'pt',
'arroz' => 'pt', 'feijão' => 'pt', 'batata' => 'pt',
'cenoura' => 'pt', 'tomate' => 'pt', 'cebola' => 'pt',
'alho' => 'pt', 'pimentão' => 'pt', 'maçã' => 'pt',
'pêra' => 'pt', 'uva' => 'pt', 'morango' => 'pt',
'banana' => 'pt', 'laranja' => 'pt', 'limão' => 'pt',
'açúcar' => 'pt', 'sal' => 'pt', 'pimenta' => 'pt',
'óleo' => 'pt', 'vinagre' => 'pt', 'farinha' => 'pt',
'massa' => 'pt', 'creme' => 'pt', 'iogurte' => 'pt',
'frango' => 'pt', 'vaca' => 'pt', 'porco' => 'pt',
'cordeiro' => 'pt', 'pato' => 'pt', 'peru' => 'pt',
'carne' => 'pt', 'peixe' => 'pt', 'salmão' => 'pt',
'atum' => 'pt', 'camarão' => 'pt', 'lula' => 'pt',
'legume' => 'pt', 'legumes' => 'pt', 'abacate' => 'pt',
'abóbora' => 'pt', 'berinjela' => 'pt', 'espinafre' => 'pt',
'alface' => 'pt', 'repolho' => 'pt', 'brócolis' => 'pt',
'couve' => 'pt', 'ervilha' => 'pt', 'milho' => 'pt',
'trigo' => 'pt', 'aveia' => 'pt', 'amendoim' => 'pt',
'noz' => 'pt', 'amêndoa' => 'pt', 'castanha' => 'pt',
'chocolate' => 'pt', 'bolacha' => 'pt', 'bolo' => 'pt',
'sorvete' => 'pt', 'mel' => 'pt', 'geleia' => 'pt',
'mostarda' => 'pt', 'maionese' => 'pt', 'ketchup' => 'pt',
'vinho' => 'pt', 'cerveja' => 'pt', 'água' => 'pt',
'suco' => 'pt', 'sopa' => 'pt', 'salada' => 'pt',
'café' => 'pt', 'chá' => 'pt',
// ========== 德语食材 (200+词汇) ==========
'ei' => 'de', 'eier' => 'de', 'käse' => 'de',
'butter' => 'de', 'milch' => 'de', 'schinken' => 'de',
'wurst' => 'de', 'brot' => 'de', 'brötchen' => 'de',
'käse' => 'de', 'quark' => 'de', 'joghurt' => 'de',
'sahne' => 'de', 'eier' => 'de', 'huhn' => 'de',
'rind' => 'de', 'schwein' => 'de', 'lamm' => 'de',
'ente' => 'de', 'pute' => 'de', 'fleisch' => 'de',
'fisch' => 'de', 'lachs' => 'de', 'thunfisch' => 'de',
'garnele' => 'de', 'tintenfisch' => 'de', 'gemüse' => 'de',
'karotte' => 'de', 'tomate' => 'de', 'zwiebel' => 'de',
'knoblauch' => 'de', 'paprika' => 'de', 'apfel' => 'de',
'birne' => 'de', 'traube' => 'de', 'erdbeere' => 'de',
'banane' => 'de', 'orange' => 'de', 'zitrone' => 'de',
'zucker' => 'de', 'salz' => 'de', 'pfeffer' => 'de',
'öl' => 'de', 'essig' => 'de', 'mehl' => 'de',
'reis' => 'de', 'nudeln' => 'de', 'kartoffel' => 'de',
'gurke' => 'de', 'spinat' => 'de', 'salat' => 'de',
'kohl' => 'de', 'brokkoli' => 'de', 'blumenkohl' => 'de',
'erbsen' => 'de', 'mais' => 'de', 'weizen' => 'de',
'hafer' => 'de', 'nuss' => 'de', 'mandel' => 'de',
'schokolade' => 'de', 'kekse' => 'de', 'kuchen' => 'de',
'eis' => 'de', 'honig' => 'de', 'marmelade' => 'de',
'senf' => 'de', 'majonäse' => 'de', 'wein' => 'de',
'bier' => 'de', 'wasser' => 'de', 'saft' => 'de',
'suppe' => 'de', 'kaffee' => 'de', 'tee' => 'de',
// ========== 英语食材 (200+词汇) ==========
'egg' => 'en', 'eggs' => 'en', 'cheese' => 'en',
'butter' => 'en', 'milk' => 'en', 'ham' => 'en',
'sausage' => 'en', 'bread' => 'en', 'baguette' => 'en',
'croissant' => 'en', 'cream' => 'en', 'yogurt' => 'en',
'chicken' => 'en', 'beef' => 'en', 'pork' => 'en',
'lamb' => 'en', 'duck' => 'en', 'turkey' => 'en',
'meat' => 'en', 'fish' => 'en', 'salmon' => 'en',
'tuna' => 'en', 'shrimp' => 'en', 'squid' => 'en',
'vegetable' => 'en', 'carrot' => 'en', 'tomato' => 'en',
'onion' => 'en', 'garlic' => 'en', 'pepper' => 'en',
'apple' => 'en', 'pear' => 'en', 'grape' => 'en',
'strawberry' => 'en', 'banana' => 'en', 'orange' => 'en',
'lemon' => 'en', 'sugar' => 'en', 'salt' => 'en',
'pepper' => 'en', 'oil' => 'en', 'vinegar' => 'en',
'flour' => 'en', 'rice' => 'en', 'pasta' => 'en',
'potato' => 'en', 'cucumber' => 'en', 'spinach' => 'en',
'lettuce' => 'en', 'cabbage' => 'en', 'broccoli' => 'en',
'cauliflower' => 'en', 'pea' => 'en', 'corn' => 'en',
'wheat' => 'en', 'oat' => 'en', 'nut' => 'en',
'almond' => 'en', 'chocolate' => 'en', 'cookie' => 'en',
'cake' => 'en', 'icecream' => 'en', 'honey' => 'en',
'jam' => 'en', 'mustard' => 'en', 'mayonnaise' => 'en',
'ketchup' => 'en', 'wine' => 'en', 'beer' => 'en',
'water' => 'en', 'juice' => 'en', 'soup' => 'en',
'salad' => 'en', 'coffee' => 'en', 'tea' => 'en',
// ========== 中文食材 (200+词汇) ==========
'鸡蛋' => 'zh', '蛋' => 'zh', '奶酪' => 'zh', '芝士' => 'zh',
'黄油' => 'zh', '牛奶' => 'zh', '奶' => 'zh', '火腿' => 'zh',
'香肠' => 'zh', '面包' => 'zh', '法棍' => 'zh', '牛角包' => 'zh',
'奶油' => 'zh', '酸奶' => 'zh', '鸡肉' => 'zh', '牛肉' => 'zh',
'猪肉' => 'zh', '羊肉' => 'zh', '鸭肉' => 'zh', '火鸡' => 'zh',
'肉' => 'zh', '鱼' => 'zh', '三文鱼' => 'zh', '金枪鱼' => 'zh',
'虾' => 'zh', '鱿鱼' => 'zh', '蔬菜' => 'zh', '胡萝卜' => 'zh',
'番茄' => 'zh', '西红柿' => 'zh', '洋葱' => 'zh', '大蒜' => 'zh',
'辣椒' => 'zh', '苹果' => 'zh', '梨' => 'zh', '葡萄' => 'zh',
'草莓' => 'zh', '香蕉' => 'zh', '橙子' => 'zh', '柠檬' => 'zh',
'糖' => 'zh', '盐' => 'zh', '胡椒' => 'zh', '油' => 'zh',
'醋' => 'zh', '面粉' => 'zh', '米' => 'zh', '米饭' => 'zh',
'面条' => 'zh', '土豆' => 'zh', '黄瓜' => 'zh', '菠菜' => 'zh',
'生菜' => 'zh', '白菜' => 'zh', '西兰花' => 'zh', '花菜' => 'zh',
'豌豆' => 'zh', '玉米' => 'zh', '小麦' => 'zh', '燕麦' => 'zh',
'坚果' => 'zh', '杏仁' => 'zh', '巧克力' => 'zh', '饼干' => 'zh',
'蛋糕' => 'zh', '冰淇淋' => 'zh', '蜂蜜' => 'zh', '果酱' => 'zh',
'芥末' => 'zh', '蛋黄酱' => 'zh', '番茄酱' => 'zh', '红酒' => 'zh',
'啤酒' => 'zh', '水' => 'zh', '果汁' => 'zh', '汤' => 'zh',
'沙拉' => 'zh', '咖啡' => 'zh', '茶' => 'zh', '米饭' => 'zh',
// ========== 日语食材 (200+词汇) ==========
'卵' => 'jp', 'たまご' => 'jp', '玉子' => 'jp',
'チーズ' => 'jp', 'バター' => 'jp', '牛乳' => 'jp',
'ハム' => 'jp', 'ソーセージ' => 'jp', 'パン' => 'jp',
'フランスパン' => 'jp', 'クロワッサン' => 'jp', 'クリーム' => 'jp',
'ヨーグルト' => 'jp', '鶏肉' => 'jp', '牛肉' => 'jp',
'豚肉' => 'jp', '羊肉' => 'jp', '鴨肉' => 'jp',
'七面鳥' => 'jp', '肉' => 'jp', '魚' => 'jp',
'サーモン' => 'jp', 'マグロ' => 'jp', 'エビ' => 'jp',
'イカ' => 'jp', '野菜' => 'jp', '人参' => 'jp',
'トマト' => 'jp', '玉ねぎ' => 'jp', 'にんにく' => 'jp',
'ピーマン' => 'jp', 'りんご' => 'jp', '梨' => 'jp',
'ぶどう' => 'jp', 'いちご' => 'jp', 'バナナ' => 'jp',
'オレンジ' => 'jp', 'レモン' => 'jp', '砂糖' => 'jp',
'塩' => 'jp', '胡椒' => 'jp', '油' => 'jp',
'酢' => 'jp', '小麦粉' => 'jp', '米' => 'jp',
'パスタ' => 'jp', 'じゃがいも' => 'jp', 'きゅうり' => 'jp',
'ほうれん草' => 'jp', 'レタス' => 'jp', 'キャベツ' => 'jp',
'ブロッコリー' => 'jp', 'カリフラワー' => 'jp', '豌豆' => 'jp',
'とうもろこし' => 'jp', '小麦' => 'jp', 'オーツ麦' => 'jp',
'ナッツ' => 'jp', 'アーモンド' => 'jp', 'チョコレート' => 'jp',
'クッキー' => 'jp', 'ケーキ' => 'jp', 'アイスクリーム' => 'jp',
'はちみつ' => 'jp', 'ジャム' => 'jp', 'からし' => 'jp',
'マヨネーズ' => 'jp', 'ケチャップ' => 'jp', 'ワイン' => 'jp',
'ビール' => 'jp', '水' => 'jp', 'ジュース' => 'jp',
'スープ' => 'jp', 'サラダ' => 'jp', 'コーヒー' => 'jp',
'お茶' => 'jp',
// ========== 韩语食材 (200+词汇) ==========
'계란' => 'kor', '달걀' => 'kor', '치즈' => 'kor',
'버터' => 'kor', '우유' => 'kor', '햄' => 'kor',
'소시지' => 'kor', '빵' => 'kor', '바게트' => 'kor',
'크루아상' => 'kor', '크림' => 'kor', '요구르트' => 'kor',
'닭고기' => 'kor', '소고기' => 'kor', '돼지고기' => 'kor',
'양고기' => 'kor', '오리고기' => 'kor', '칠면조' => 'kor',
'고기' => 'kor', '생선' => 'kor', '연어' => 'kor',
'참치' => 'kor', '새우' => 'kor', '오징어' => 'kor',
'야채' => 'kor', '당근' => 'kor', '토마토' => 'kor',
'양파' => 'kor', '마늘' => 'kor', '피망' => 'kor',
'사과' => 'kor', '배' => 'kor', '포도' => 'kor',
'딸기' => 'kor', '바나나' => 'kor', '오렌지' => 'kor',
'레몬' => 'kor', '설탕' => 'kor', '소금' => 'kor',
'후추' => 'kor', '기름' => 'kor', '식초' => 'kor',
'밀가루' => 'kor', '쌀' => 'kor', '파스타' => 'kor',
'감자' => 'kor', '오이' => 'kor', '시금치' => 'kor',
'상추' => 'kor', '양배추' => 'kor', '브로콜리' => 'kor',
'컬리플라워' => 'kor', '완두콩' => 'kor', '옥수수' => 'kor',
'밀' => 'kor', '오트밀' => 'kor', '견과류' => 'kor',
'아몬드' => 'kor', '초콜릿' => 'kor', '쿠키' => 'kor',
'케이크' => 'kor', '아이스크림' => 'kor', '꿀' => 'kor',
'잼' => 'kor', '겨자' => 'kor', '마요네즈' => 'kor',
'케첩' => 'kor', '와인' => 'kor', '맥주' => 'kor',
'물' => 'kor', '주스' => 'kor', '수프' => 'kor',
'샐러드' => 'kor', '커피' => 'kor', '차' => 'kor',
// ========== 俄语食材 (150+词汇) ==========
'яйцо' => 'ru', 'яйца' => 'ru', 'сыр' => 'ru',
'масло' => 'ru', 'молоко' => 'ru', 'ветчина' => 'ru',
'колбаса' => 'ru', 'хлеб' => 'ru', 'багет' => 'ru',
'круассан' => 'ru', 'сливки' => 'ru', 'йогурт' => 'ru',
'курица' => 'ru', 'говядина' => 'ru', 'свинина' => 'ru',
'баранина' => 'ru', 'утка' => 'ru', 'индейка' => 'ru',
'мясо' => 'ru', 'рыба' => 'ru', 'лосось' => 'ru',
'тунец' => 'ru', 'креветка' => 'ru', 'кальмар' => 'ru',
'овощ' => 'ru', 'овощи' => 'ru', 'морковь' => 'ru',
'помидор' => 'ru', 'лук' => 'ru', 'чеснок' => 'ru',
'перец' => 'ru', 'яблоко' => 'ru', 'груша' => 'ru',
'виноград' => 'ru', 'клубника' => 'ru', 'банан' => 'ru',
'апельсин' => 'ru', 'лимон' => 'ru', 'сахар' => 'ru',
'соль' => 'ru', 'перец' => 'ru', 'масло' => 'ru',
'уксус' => 'ru', 'мука' => 'ru', 'рис' => 'ru',
'паста' => 'ru', 'картофель' => 'ru', 'огурец' => 'ru',
'шпинат' => 'ru', 'салат' => 'ru', 'капуста' => 'ru',
'брокколи' => 'ru', 'цветная капуста' => 'ru', 'горох' => 'ru',
'кукуруза' => 'ru', 'пшеница' => 'ru', 'овёс' => 'ru',
'орех' => 'ru', 'миндаль' => 'ru', 'шоколад' => 'ru',
'печенье' => 'ru', 'торт' => 'ru', 'мороженое' => 'ru',
'мёд' => 'ru', 'варенье' => 'ru', 'горчица' => 'ru',
'майонез' => 'ru', 'кетчуп' => 'ru', 'вино' => 'ru',
'пиво' => 'ru', 'вода' => 'ru', 'сок' => 'ru',
'суп' => 'ru', 'салат' => 'ru', 'кофе' => 'ru',
'чай' => 'ru',
// ========== 阿拉伯语食材 (100+词汇) ==========
'بيض' => 'ara', 'جبن' => 'ara', 'حليب' => 'ara',
'زبدة' => 'ara', 'لحم' => 'ara', 'خبز' => 'ara',
'دجاج' => 'ara', 'لحم بقر' => 'ara', 'لحم خنزير' => 'ara',
'سمك' => 'ara', 'خضار' => 'ara', 'جزر' => 'ara',
'طماطم' => 'ara', 'بصل' => 'ara', 'ثوم' => 'ara',
'فلفل' => 'ara', 'تفاح' => 'ara', 'كمثرى' => 'ara',
'عنب' => 'ara', 'فراولة' => 'ara', 'موز' => 'ara',
'برتقال' => 'ara', 'ليمون' => 'ara', 'سكر' => 'ara',
'ملح' => 'ara', 'زيت' => 'ara', 'خل' => 'ara',
'دقيق' => 'ara', 'أرز' => 'ara', 'بطاطس' => 'ara',
'قهوة' => 'ara', 'شاي' => 'ara', 'ماء' => 'ara',
];
// 检测器实例
private $detector = null;
public function __construct()
{
parent::__construct();
}
/**
* 主检测方法 - 优化版
*/
public function check($text,$language)
{
$result_language = '';
if (empty($text) || !is_string($text)) {
// return false;
return 'zh';
}
$text = trim($text);
// 如果是空字符串
if ($text === '') {
// return false;
return 'zh';
}
$lowerText = mb_strtolower($text, 'UTF-8');
// 1. 先查快速食材词典(最快最准)
if (isset($this->foodDictionary[$lowerText])) {
// return $this->foodDictionary[$lowerText];
$result_language = $this->foodDictionary[$lowerText];
return $this->additional_condition_filter($result_language,$language);
}
// 2. 重音字符快速检测(在库检测之前)
$accentResult = $this->quickAccentDetection($text);
if ($accentResult !== false) {
// return $accentResult;
return $this->additional_condition_filter($accentResult,$language);
}
// 3. 单个字符特殊处理
if (mb_strlen($text) === 1) {
$singleCharResult = $this->detectSingleChar($text);
if ($singleCharResult !== false) {
// return $singleCharResult;
return $this->additional_condition_filter($singleCharResult,$language);
}
}
// 4. 使用 patrickschur/language-detection
$result = $this->detectWithPatrickschur($text);
// 5. 如果是拉丁字母且有重音字符,进行强化检测
if (preg_match('/[áéíóúñüéèêëàâæçîïôœùûÿáàâãéêíóôõúüçäöüß]/u', $text)) {
if ($result['confidence'] < 0.5) {
$strongResult = $this->strongAccentDetection($text, $result['all_results']);
if ($strongResult !== false) {
// return $strongResult;
return $this->additional_condition_filter($strongResult,$language);
}
}
}
if ($result['lang'] !== false && $result['confidence'] >= 0.3) {
// return $result['lang'];
return $this->additional_condition_filter($result['lang'],$language);
}
// 6. 增强检测
$enhancedResult = $this->enhancedDetection($text, $result);
if ($enhancedResult !== false) {
// return $enhancedResult;
return $this->additional_condition_filter($enhancedResult,$language);
}
// 7. 返回检测结果或默认英语
return $this->additional_condition_filter($result['lang'],$language);
// return $result['lang'] !== false ? $result['lang'] : 'en';
}
// /**
// * 主检测方法 - 优化版
// */
// public function check($text)
// {
// if (empty($text) || !is_string($text)) {
// return false;
// }
// $text = trim($text);
// // 如果是空字符串
// if ($text === '') {
// return false;
// }
// $lowerText = mb_strtolower($text, 'UTF-8');
// // 1. 先查快速食材词典(最快最准)
// if (isset($this->foodDictionary[$lowerText])) {
// return $this->foodDictionary[$lowerText];
// }
// // 2. 重音字符快速检测(在库检测之前)
// $accentResult = $this->quickAccentDetection($text);
// if ($accentResult !== false) {
// return $accentResult;
// }
// // 3. 单个字符特殊处理
// if (mb_strlen($text) === 1) {
// $singleCharResult = $this->detectSingleChar($text);
// if ($singleCharResult !== false) {
// return $singleCharResult;
// }
// }
// // 4. 使用 patrickschur/language-detection
// $result = $this->detectWithPatrickschur($text);
// // 5. 如果是拉丁字母且有重音字符,进行强化检测
// if (preg_match('/[áéíóúñüéèêëàâæçîïôœùûÿáàâãéêíóôõúüçäöüß]/u', $text)) {
// if ($result['confidence'] < 0.5) {
// $strongResult = $this->strongAccentDetection($text, $result['all_results']);
// if ($strongResult !== false) {
// return $strongResult;
// }
// }
// }
// if ($result['lang'] !== false && $result['confidence'] >= 0.3) {
// return $result['lang'];
// }
// // 6. 增强检测
// $enhancedResult = $this->enhancedDetection($text, $result);
// if ($enhancedResult !== false) {
// return $enhancedResult;
// }
// // 7. 返回检测结果或默认英语
// return $result['lang'] !== false ? $result['lang'] : 'en';
// }
/**
* 快速重音字符检测
*/
private function quickAccentDetection($text)
{
// 西班牙语特有字符
if (strpos($text, 'ñ') !== false ||
strpos($text, '¡') !== false ||
strpos($text, '¿') !== false) {
return 'spa';
}
// 葡萄牙语特有字符
if (strpos($text, 'ã') !== false || strpos($text, 'õ') !== false) {
return 'pt';
}
// 法语特有字符
if (strpos($text, 'œ') !== false || strpos($text, 'æ') !== false) {
return 'fra';
}
// 德语特有字符
if (strpos($text, 'ß') !== false) {
return 'de';
}
return false;
}
/**
* 强化的重音字符检测
*/
private function strongAccentDetection($text, $initialScores = [])
{
$lowerText = mb_strtolower($text, 'UTF-8');
$scores = [
'spa' => 0, // 西班牙语
'fra' => 0, // 法语
'pt' => 0, // 葡萄牙语
'de' => 0, // 德语
];
// 特征字符加权
$features = [
'spa' => [
'chars' => ['á', 'é', 'í', 'ó', 'ú', 'ñ', 'ü', '¡', '¿'],
'unique' => ['ñ', '¡', '¿'],
'score' => 0
],
'fra' => [
'chars' => ['é', 'è', 'ê', 'ë', 'à', 'â', 'æ', 'ç', 'î', 'ï', 'ô', 'œ', 'ù', 'û', 'ÿ'],
'unique' => ['œ', 'æ'],
'score' => 0
],
'pt' => [
'chars' => ['á', 'à', 'â', 'ã', 'é', 'ê', 'í', 'ó', 'ô', 'õ', 'ú', 'ü', 'ç'],
'unique' => ['ã', 'õ'],
'score' => 0
],
'de' => [
'chars' => ['ä', 'ö', 'ü', 'ß'],
'unique' => ['ß'],
'score' => 0
]
];
// 计算特征分
foreach ($features as $lang => $data) {
foreach ($data['chars'] as $char) {
if (mb_strpos($text, $char) !== false) {
$scores[$lang] += 2;
}
}
foreach ($data['unique'] as $char) {
if (mb_strpos($text, $char) !== false) {
$scores[$lang] += 5; // 特有字符高分
}
}
}
// 结合库的检测结果
if (!empty($initialScores)) {
foreach ($initialScores as $lang => $score) {
$ourCode = $this->isoToYourCode($lang);
if ($ourCode !== false && isset($scores[$ourCode])) {
$scores[$ourCode] += $score * 3; // 加权更高
}
}
}
// 查找最高分
arsort($scores);
$topLang = key($scores);
$topScore = current($scores);
// 如果分数足够高
if ($topScore >= 5) {
$secondScore = next($scores);
if ($topScore - $secondScore >= 2) {
return $topLang;
}
}
return false;
}
/**
* 使用 patrickschur/language-detection 检测
*/
private function detectWithPatrickschur($text)
{
try {
if ($this->detector === null) {
$this->detector = new Language();
}
$results = $this->detector
->detect($text)
->limit(0, 3);
if (empty($results)) {
return ['lang' => false, 'confidence' => 0, 'all_results' => []];
}
$resultArray = [];
foreach ($results as $lang => $score) {
$resultArray[$lang] = $score;
}
arsort($resultArray);
$topLang = key($resultArray);
$confidence = current($resultArray);
$yourLangCode = $this->isoToYourCode($topLang);
return [
'lang' => $yourLangCode,
'confidence' => $confidence,
'all_results' => $resultArray
];
} catch (\Exception $e) {
\think\Log::error('语言检测失败: ' . $e->getMessage());
return ['lang' => false, 'confidence' => 0, 'all_results' => []];
}
}
/**
* 增强检测
*/
private function enhancedDetection($text, $initialResult)
{
$text = trim($text);
if (mb_strlen($text) <= 3) {
$charResult = $this->shortTextDetection($text);
if ($charResult !== false) {
return $charResult;
}
}
if (preg_match('/^[\p{Latin}\s\pP]+$/u', $text)) {
$latinResult = $this->detectLatinLanguage($text, $initialResult['all_results']);
if ($latinResult !== false) {
return $latinResult;
}
}
return $this->detectByAdvancedRules($text);
}
/**
* 短文本检测
*/
private function shortTextDetection($text)
{
$text = trim($text);
if (preg_match('/^[a-zA-Z]+$/u', $text)) {
return false;
}
if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $text)) {
return 'zh';
}
if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $text)) {
return 'jp';
}
if (preg_match('/[\x{ac00}-\x{d7af}]/u', $text)) {
return 'kor';
}
if (preg_match('/[\x{0600}-\x{06ff}]/u', $text)) {
return 'ara';
}
if (preg_match('/[\x{0400}-\x{04ff}]/u', $text)) {
return 'ru';
}
return false;
}
/**
* 拉丁语系检测
*/
private function detectLatinLanguage($text, $initialScores = [])
{
$lowerText = mb_strtolower($text, 'UTF-8');
$scores = [
'fra' => 0,
'spa' => 0,
'pt' => 0,
'de' => 0,
'en' => 0,
];
if (preg_match('/[éèêëàâæçîïôœùûÿ]/u', $text)) {
$scores['fra'] += 3;
}
if (preg_match('/[áéíóúñü¡¿]/u', $text)) {
$scores['spa'] += 2;
}
if (preg_match('/[áàâãéêíóôõúüç]/u', $text)) {
$scores['pt'] += 2;
}
if (preg_match('/[äöüß]/u', $text)) {
$scores['de'] += 3;
}
if (strpos($text, 'ñ') !== false) {
$scores['spa'] += 3;
}
if (strpos($text, 'ã') !== false || strpos($text, 'õ') !== false) {
$scores['pt'] += 3;
}
if (strpos($text, 'œ') !== false || strpos($text, 'æ') !== false) {
$scores['fra'] += 3;
}
if (strpos($text, 'ß') !== false) {
$scores['de'] += 3;
}
if (strpos($text, '¡') !== false || strpos($text, '¿') !== false) {
$scores['spa'] += 3;
}
if (!empty($initialScores)) {
foreach ($initialScores as $lang => $score) {
$ourCode = $this->isoToYourCode($lang);
if ($ourCode !== false && isset($scores[$ourCode])) {
$scores[$ourCode] += $score * 2;
}
}
}
$commonWords = [
'fra' => ['le', 'la', 'les', 'un', 'une', 'des', 'du', 'de', 'à'],
'spa' => ['el', 'la', 'los', 'las', 'un', 'una', 'de', 'y', 'a'],
'pt' => ['o', 'a', 'os', 'as', 'um', 'uma', 'de', 'e'],
'de' => ['der', 'die', 'das', 'ein', 'und'],
];
foreach ($commonWords as $lang => $words) {
if (in_array($lowerText, $words) && isset($scores[$lang])) {
$scores[$lang] += 5;
}
}
arsort($scores);
$topLang = key($scores);
$topScore = current($scores);
if ($topScore >= 3) {
return $topLang;
}
return false;
}
/**
* 高级规则检测
*/
private function detectByAdvancedRules($text)
{
$cleanText = trim($text);
if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $cleanText)) {
return 'zh';
}
if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $cleanText)) {
return 'jp';
}
if (preg_match('/[\x{ac00}-\x{d7af}]/u', $cleanText)) {
return 'kor';
}
if (preg_match('/[\x{0600}-\x{06ff}]/u', $cleanText)) {
return 'ara';
}
if (preg_match('/[\x{0400}-\x{04ff}]/u', $cleanText)) {
return 'ru';
}
return false;
}
/**
* 单个字符检测
*/
private function detectSingleChar($char)
{
if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $char)) {
return 'zh';
}
if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $char)) {
return 'jp';
}
if (preg_match('/[\x{ac00}-\x{d7af}]/u', $char)) {
return 'kor';
}
if (preg_match('/[\x{0600}-\x{06ff}]/u', $char)) {
return 'ara';
}
if (preg_match('/[\x{0400}-\x{04ff}]/u', $char)) {
return 'ru';
}
return false;
}
/**
* ISO代码转我们的代码
*/
private function isoToYourCode($isoCode)
{
return isset($this->isoToOurCode[$isoCode]) ? $this->isoToOurCode[$isoCode] : false;
}
/**
* 批量检测
*/
public function batchCheck($texts)
{
if (!is_array($texts)) {
return [];
}
$results = [];
foreach ($texts as $text) {
$results[] = $this->check($text);
}
return $results;
}
/**
* 获取所有支持的语言
*/
public function getSupportedLanguages()
{
return $this->languageMap;
}
/**
* 添加自定义词汇
*/
public function addToDictionary($word, $languageCode)
{
if (isset($this->languageMap[$languageCode])) {
$this->foodDictionary[mb_strtolower($word, 'UTF-8')] = $languageCode;
return true;
}
return false;
}
public function additional_condition_filter($judgment, $language)
{
$result_language = '';
// 7. 返回检测结果或默认英语
if($judgment !== false){
$result_language = $judgment;
if($result_language == 'zh' || $result_language == 'jp'){
if($language == 'zh'){
$result_language = 'zh';
}else if($language == 'jp'){
$result_language = 'jp';
}
}else if($result_language == 'en' || $result_language == 'fra' || $result_language == 'de' || $result_language == 'pt' || $result_language == 'spa'){
if($language == 'en'){
$result_language = 'en';
}else if($language == 'fra'){
$result_language = 'fra';
}else if($language == 'de'){
$result_language = 'de';
}else if($language == 'pt'){
$result_language = 'pt';
}else if($language == 'spa'){
$result_language = 'spa';
}
}
}else{
if($language){
$result_language = $language;
}else{
$result_language = 'zh';
}
}
return $result_language;
}
/**
* 测试函数
*/
public function test()
{
$testWords = [
'maíz' => 'spa', // 西班牙语
'oeuf' => 'fra', // 法语
'huevo' => 'spa', // 西班牙语
'ovo' => 'pt', // 葡萄牙语
'fromage' => 'fra', // 法语
'queso' => 'spa', // 西班牙语
'queijo' => 'pt', // 葡萄牙语
'crème' => 'fra', // 法语
'jalapeño' => 'spa', // 西班牙语
'pão' => 'pt', // 葡萄牙语
'Ei' => 'de', // 德语
'яйцо' => 'ru', // 俄语
'卵' => 'jp', // 日语
'蛋' => 'zh', // 中文
'계란' => 'kor', // 韩语
'بيض' => 'ara', // 阿拉伯语
'egg' => 'en', // 英语
];
$results = [];
foreach ($testWords as $word => $expected) {
$detected = $this->check($word);
$results[$word] = [
'detected' => $detected,
'expected' => $expected,
'correct' => $detected === $expected
];
}
return $results;
}
/**
* 获取统计信息
*/
public function getStats()
{
$stats = [];
foreach ($this->languageMap as $code => $info) {
$count = 0;
foreach ($this->foodDictionary as $word => $lang) {
if ($lang === $code) {
$count++;
}
}
$stats[$code] = [
'name' => $info[1],
'word_count' => $count
];
}
return $stats;
}
}