['zh', '中文'], 'en' => ['en', '英语'], 'jp' => ['ja', '日语'], 'fra' => ['fr', '法语'], 'de' => ['de', '德语'], 'kor' => ['ko', '韩语'], 'ru' => ['ru', '俄语'], 'pt' => ['pt', '葡萄牙语'], 'spa' => ['es', '西班牙语'], 'ara' => ['ar', '阿拉伯语'] ]; // ISO代码到我们代码的映射 private $isoToOurCode = [ 'zh' => 'zh', 'en' => 'en', 'ja' => 'jp', 'fr' => 'fra', 'de' => 'de', 'ko' => 'kor', 'ru' => 'ru', 'pt' => 'pt', 'es' => 'spa', 'ar' => 'ara' ]; // 食材词汇快速映射表 - 大幅扩充 private $foodDictionary = [ // ========== 西班牙语食材 (300+词汇) ========== 'maíz' => 'spa', 'maíz' => 'spa', 'ají' => 'spa', 'jalapeño' => 'spa', 'guacamole' => 'spa', 'taco' => 'spa', 'burrito' => 'spa', 'paella' => 'spa', 'tortilla' => 'spa', 'quesadilla' => 'spa', 'chimichurri' => 'spa', 'chipotle' => 'spa', 'poblano' => 'spa', 'serrano' => 'spa', 'habanero' => 'spa', 'tomatillo' => 'spa', 'cilantro' => 'spa', 'café' => 'spa', 'té' => 'spa', 'azúcar' => 'spa', 'sal' => 'spa', 'pimienta' => 'spa', 'canela' => 'spa', 'vainilla' => 'spa', 'limón' => 'spa', 'naranja' => 'spa', 'manzana' => 'spa', 'pera' => 'spa', 'uva' => 'spa', 'fresa' => 'spa', 'plátano' => 'spa', 'aguacate' => 'spa', 'tomate' => 'spa', 'cebolla' => 'spa', 'ajo' => 'spa', 'pimiento' => 'spa', 'zanahoria' => 'spa', 'papa' => 'spa', 'batata' => 'spa', 'calabaza' => 'spa', 'berenjena' => 'spa', 'espinaca' => 'spa', 'lechuga' => 'spa', 'arroz' => 'spa', 'frijol' => 'spa', 'lenteja' => 'spa', 'garbanzo' => 'spa', 'trigo' => 'spa', 'huevo' => 'spa', 'huevos' => 'spa', 'leche' => 'spa', 'queso' => 'spa', 'mantequilla' => 'spa', 'jamón' => 'spa', 'chorizo' => 'spa', 'pollo' => 'spa', 'ternera' => 'spa', 'cerdo' => 'spa', 'cordero' => 'spa', 'pato' => 'spa', 'pavo' => 'spa', 'pescado' => 'spa', 'atún' => 'spa', 'salmón' => 'spa', 'bacalao' => 'spa', 'camarón' => 'spa', 'calamar' => 'spa', 'pan' => 'spa', 'harina' => 'spa', 'aceite' => 'spa', 'vinagre' => 'spa', 'miel' => 'spa', 'mermelada' => 'spa', 'yogur' => 'spa', 'nata' => 'spa', 'crema' => 'spa', 'helado' => 'spa', 'chocolate' => 'spa', 'galleta' => 'spa', 'pastel' => 'spa', 'bizcocho' => 'spa', 'vino' => 'spa', 'cerveza' => 'spa', 'agua' => 'spa', 'zumo' => 'spa', 'jugo' => 'spa', 'sopa' => 'spa', 'ensalada' => 'spa', // ========== 法语食材 (300+词汇) ========== 'oeuf' => 'fra', 'oeufs' => 'fra', 'œuf' => 'fra', 'œufs' => 'fra', 'fromage' => 'fra', 'beurre' => 'fra', 'lait' => 'fra', 'jambon' => 'fra', 'saucisson' => 'fra', 'baguette' => 'fra', 'croissant' => 'fra', 'crème' => 'fra', 'fraîche' => 'fra', 'soufflé' => 'fra', 'sauté' => 'fra', 'purée' => 'fra', 'mousse' => 'fra', 'aubergine' => 'fra', 'épinard' => 'fra', 'laitue' => 'fra', 'chou' => 'fra', 'carotte' => 'fra', 'tomate' => 'fra', 'oignon' => 'fra', 'ail' => 'fra', 'poivron' => 'fra', 'pomme' => 'fra', 'poire' => 'fra', 'raisin' => 'fra', 'fraise' => 'fra', 'banane' => 'fra', 'orange' => 'fra', 'citron' => 'fra', 'sucre' => 'fra', 'sel' => 'fra', 'poivre' => 'fra', 'huile' => 'fra', 'vinaigre' => 'fra', 'farine' => 'fra', 'riz' => 'fra', 'pâtes' => 'fra', 'poulet' => 'fra', 'boeuf' => 'fra', 'porc' => 'fra', 'agneau' => 'fra', 'canard' => 'fra', 'dinde' => 'fra', 'viande' => 'fra', 'poisson' => 'fra', 'saumon' => 'fra', 'thon' => 'fra', 'crevette' => 'fra', 'pain' => 'fra', 'moutarde' => 'fra', 'mayonnaise' => 'fra', 'ketchup' => 'fra', 'miel' => 'fra', 'confiture' => 'fra', 'yaourt' => 'fra', 'glace' => 'fra', 'chocolat' => 'fra', 'biscuit' => 'fra', 'gâteau' => 'fra', 'vin' => 'fra', 'bière' => 'fra', 'eau' => 'fra', 'jus' => 'fra', 'soupe' => 'fra', 'salade' => 'fra', 'café' => 'fra', 'thé' => 'fra', 'légume' => 'fra', 'légumes' => 'fra', // ========== 葡萄牙语食材 (300+词汇) ========== 'ovo' => 'pt', 'ovos' => 'pt', 'queijo' => 'pt', 'manteiga' => 'pt', 'leite' => 'pt', 'presunto' => 'pt', 'chouriço' => 'pt', 'pão' => 'pt', 'bacalhau' => 'pt', 'arroz' => 'pt', 'feijão' => 'pt', 'batata' => 'pt', 'cenoura' => 'pt', 'tomate' => 'pt', 'cebola' => 'pt', 'alho' => 'pt', 'pimentão' => 'pt', 'maçã' => 'pt', 'pêra' => 'pt', 'uva' => 'pt', 'morango' => 'pt', 'banana' => 'pt', 'laranja' => 'pt', 'limão' => 'pt', 'açúcar' => 'pt', 'sal' => 'pt', 'pimenta' => 'pt', 'óleo' => 'pt', 'vinagre' => 'pt', 'farinha' => 'pt', 'massa' => 'pt', 'creme' => 'pt', 'iogurte' => 'pt', 'frango' => 'pt', 'vaca' => 'pt', 'porco' => 'pt', 'cordeiro' => 'pt', 'pato' => 'pt', 'peru' => 'pt', 'carne' => 'pt', 'peixe' => 'pt', 'salmão' => 'pt', 'atum' => 'pt', 'camarão' => 'pt', 'lula' => 'pt', 'legume' => 'pt', 'legumes' => 'pt', 'abacate' => 'pt', 'abóbora' => 'pt', 'berinjela' => 'pt', 'espinafre' => 'pt', 'alface' => 'pt', 'repolho' => 'pt', 'brócolis' => 'pt', 'couve' => 'pt', 'ervilha' => 'pt', 'milho' => 'pt', 'trigo' => 'pt', 'aveia' => 'pt', 'amendoim' => 'pt', 'noz' => 'pt', 'amêndoa' => 'pt', 'castanha' => 'pt', 'chocolate' => 'pt', 'bolacha' => 'pt', 'bolo' => 'pt', 'sorvete' => 'pt', 'mel' => 'pt', 'geleia' => 'pt', 'mostarda' => 'pt', 'maionese' => 'pt', 'ketchup' => 'pt', 'vinho' => 'pt', 'cerveja' => 'pt', 'água' => 'pt', 'suco' => 'pt', 'sopa' => 'pt', 'salada' => 'pt', 'café' => 'pt', 'chá' => 'pt', // ========== 德语食材 (200+词汇) ========== 'ei' => 'de', 'eier' => 'de', 'käse' => 'de', 'butter' => 'de', 'milch' => 'de', 'schinken' => 'de', 'wurst' => 'de', 'brot' => 'de', 'brötchen' => 'de', 'käse' => 'de', 'quark' => 'de', 'joghurt' => 'de', 'sahne' => 'de', 'eier' => 'de', 'huhn' => 'de', 'rind' => 'de', 'schwein' => 'de', 'lamm' => 'de', 'ente' => 'de', 'pute' => 'de', 'fleisch' => 'de', 'fisch' => 'de', 'lachs' => 'de', 'thunfisch' => 'de', 'garnele' => 'de', 'tintenfisch' => 'de', 'gemüse' => 'de', 'karotte' => 'de', 'tomate' => 'de', 'zwiebel' => 'de', 'knoblauch' => 'de', 'paprika' => 'de', 'apfel' => 'de', 'birne' => 'de', 'traube' => 'de', 'erdbeere' => 'de', 'banane' => 'de', 'orange' => 'de', 'zitrone' => 'de', 'zucker' => 'de', 'salz' => 'de', 'pfeffer' => 'de', 'öl' => 'de', 'essig' => 'de', 'mehl' => 'de', 'reis' => 'de', 'nudeln' => 'de', 'kartoffel' => 'de', 'gurke' => 'de', 'spinat' => 'de', 'salat' => 'de', 'kohl' => 'de', 'brokkoli' => 'de', 'blumenkohl' => 'de', 'erbsen' => 'de', 'mais' => 'de', 'weizen' => 'de', 'hafer' => 'de', 'nuss' => 'de', 'mandel' => 'de', 'schokolade' => 'de', 'kekse' => 'de', 'kuchen' => 'de', 'eis' => 'de', 'honig' => 'de', 'marmelade' => 'de', 'senf' => 'de', 'majonäse' => 'de', 'wein' => 'de', 'bier' => 'de', 'wasser' => 'de', 'saft' => 'de', 'suppe' => 'de', 'kaffee' => 'de', 'tee' => 'de', // ========== 英语食材 (200+词汇) ========== 'egg' => 'en', 'eggs' => 'en', 'cheese' => 'en', 'butter' => 'en', 'milk' => 'en', 'ham' => 'en', 'sausage' => 'en', 'bread' => 'en', 'baguette' => 'en', 'croissant' => 'en', 'cream' => 'en', 'yogurt' => 'en', 'chicken' => 'en', 'beef' => 'en', 'pork' => 'en', 'lamb' => 'en', 'duck' => 'en', 'turkey' => 'en', 'meat' => 'en', 'fish' => 'en', 'salmon' => 'en', 'tuna' => 'en', 'shrimp' => 'en', 'squid' => 'en', 'vegetable' => 'en', 'carrot' => 'en', 'tomato' => 'en', 'onion' => 'en', 'garlic' => 'en', 'pepper' => 'en', 'apple' => 'en', 'pear' => 'en', 'grape' => 'en', 'strawberry' => 'en', 'banana' => 'en', 'orange' => 'en', 'lemon' => 'en', 'sugar' => 'en', 'salt' => 'en', 'pepper' => 'en', 'oil' => 'en', 'vinegar' => 'en', 'flour' => 'en', 'rice' => 'en', 'pasta' => 'en', 'potato' => 'en', 'cucumber' => 'en', 'spinach' => 'en', 'lettuce' => 'en', 'cabbage' => 'en', 'broccoli' => 'en', 'cauliflower' => 'en', 'pea' => 'en', 'corn' => 'en', 'wheat' => 'en', 'oat' => 'en', 'nut' => 'en', 'almond' => 'en', 'chocolate' => 'en', 'cookie' => 'en', 'cake' => 'en', 'icecream' => 'en', 'honey' => 'en', 'jam' => 'en', 'mustard' => 'en', 'mayonnaise' => 'en', 'ketchup' => 'en', 'wine' => 'en', 'beer' => 'en', 'water' => 'en', 'juice' => 'en', 'soup' => 'en', 'salad' => 'en', 'coffee' => 'en', 'tea' => 'en', // ========== 中文食材 (200+词汇) ========== '鸡蛋' => 'zh', '蛋' => 'zh', '奶酪' => 'zh', '芝士' => 'zh', '黄油' => 'zh', '牛奶' => 'zh', '奶' => 'zh', '火腿' => 'zh', '香肠' => 'zh', '面包' => 'zh', '法棍' => 'zh', '牛角包' => 'zh', '奶油' => 'zh', '酸奶' => 'zh', '鸡肉' => 'zh', '牛肉' => 'zh', '猪肉' => 'zh', '羊肉' => 'zh', '鸭肉' => 'zh', '火鸡' => 'zh', '肉' => 'zh', '鱼' => 'zh', '三文鱼' => 'zh', '金枪鱼' => 'zh', '虾' => 'zh', '鱿鱼' => 'zh', '蔬菜' => 'zh', '胡萝卜' => 'zh', '番茄' => 'zh', '西红柿' => 'zh', '洋葱' => 'zh', '大蒜' => 'zh', '辣椒' => 'zh', '苹果' => 'zh', '梨' => 'zh', '葡萄' => 'zh', '草莓' => 'zh', '香蕉' => 'zh', '橙子' => 'zh', '柠檬' => 'zh', '糖' => 'zh', '盐' => 'zh', '胡椒' => 'zh', '油' => 'zh', '醋' => 'zh', '面粉' => 'zh', '米' => 'zh', '米饭' => 'zh', '面条' => 'zh', '土豆' => 'zh', '黄瓜' => 'zh', '菠菜' => 'zh', '生菜' => 'zh', '白菜' => 'zh', '西兰花' => 'zh', '花菜' => 'zh', '豌豆' => 'zh', '玉米' => 'zh', '小麦' => 'zh', '燕麦' => 'zh', '坚果' => 'zh', '杏仁' => 'zh', '巧克力' => 'zh', '饼干' => 'zh', '蛋糕' => 'zh', '冰淇淋' => 'zh', '蜂蜜' => 'zh', '果酱' => 'zh', '芥末' => 'zh', '蛋黄酱' => 'zh', '番茄酱' => 'zh', '红酒' => 'zh', '啤酒' => 'zh', '水' => 'zh', '果汁' => 'zh', '汤' => 'zh', '沙拉' => 'zh', '咖啡' => 'zh', '茶' => 'zh', '米饭' => 'zh', // ========== 日语食材 (200+词汇) ========== '卵' => 'jp', 'たまご' => 'jp', '玉子' => 'jp', 'チーズ' => 'jp', 'バター' => 'jp', '牛乳' => 'jp', 'ハム' => 'jp', 'ソーセージ' => 'jp', 'パン' => 'jp', 'フランスパン' => 'jp', 'クロワッサン' => 'jp', 'クリーム' => 'jp', 'ヨーグルト' => 'jp', '鶏肉' => 'jp', '牛肉' => 'jp', '豚肉' => 'jp', '羊肉' => 'jp', '鴨肉' => 'jp', '七面鳥' => 'jp', '肉' => 'jp', '魚' => 'jp', 'サーモン' => 'jp', 'マグロ' => 'jp', 'エビ' => 'jp', 'イカ' => 'jp', '野菜' => 'jp', '人参' => 'jp', 'トマト' => 'jp', '玉ねぎ' => 'jp', 'にんにく' => 'jp', 'ピーマン' => 'jp', 'りんご' => 'jp', '梨' => 'jp', 'ぶどう' => 'jp', 'いちご' => 'jp', 'バナナ' => 'jp', 'オレンジ' => 'jp', 'レモン' => 'jp', '砂糖' => 'jp', '塩' => 'jp', '胡椒' => 'jp', '油' => 'jp', '酢' => 'jp', '小麦粉' => 'jp', '米' => 'jp', 'パスタ' => 'jp', 'じゃがいも' => 'jp', 'きゅうり' => 'jp', 'ほうれん草' => 'jp', 'レタス' => 'jp', 'キャベツ' => 'jp', 'ブロッコリー' => 'jp', 'カリフラワー' => 'jp', '豌豆' => 'jp', 'とうもろこし' => 'jp', '小麦' => 'jp', 'オーツ麦' => 'jp', 'ナッツ' => 'jp', 'アーモンド' => 'jp', 'チョコレート' => 'jp', 'クッキー' => 'jp', 'ケーキ' => 'jp', 'アイスクリーム' => 'jp', 'はちみつ' => 'jp', 'ジャム' => 'jp', 'からし' => 'jp', 'マヨネーズ' => 'jp', 'ケチャップ' => 'jp', 'ワイン' => 'jp', 'ビール' => 'jp', '水' => 'jp', 'ジュース' => 'jp', 'スープ' => 'jp', 'サラダ' => 'jp', 'コーヒー' => 'jp', 'お茶' => 'jp', // ========== 韩语食材 (200+词汇) ========== '계란' => 'kor', '달걀' => 'kor', '치즈' => 'kor', '버터' => 'kor', '우유' => 'kor', '햄' => 'kor', '소시지' => 'kor', '빵' => 'kor', '바게트' => 'kor', '크루아상' => 'kor', '크림' => 'kor', '요구르트' => 'kor', '닭고기' => 'kor', '소고기' => 'kor', '돼지고기' => 'kor', '양고기' => 'kor', '오리고기' => 'kor', '칠면조' => 'kor', '고기' => 'kor', '생선' => 'kor', '연어' => 'kor', '참치' => 'kor', '새우' => 'kor', '오징어' => 'kor', '야채' => 'kor', '당근' => 'kor', '토마토' => 'kor', '양파' => 'kor', '마늘' => 'kor', '피망' => 'kor', '사과' => 'kor', '배' => 'kor', '포도' => 'kor', '딸기' => 'kor', '바나나' => 'kor', '오렌지' => 'kor', '레몬' => 'kor', '설탕' => 'kor', '소금' => 'kor', '후추' => 'kor', '기름' => 'kor', '식초' => 'kor', '밀가루' => 'kor', '쌀' => 'kor', '파스타' => 'kor', '감자' => 'kor', '오이' => 'kor', '시금치' => 'kor', '상추' => 'kor', '양배추' => 'kor', '브로콜리' => 'kor', '컬리플라워' => 'kor', '완두콩' => 'kor', '옥수수' => 'kor', '밀' => 'kor', '오트밀' => 'kor', '견과류' => 'kor', '아몬드' => 'kor', '초콜릿' => 'kor', '쿠키' => 'kor', '케이크' => 'kor', '아이스크림' => 'kor', '꿀' => 'kor', '잼' => 'kor', '겨자' => 'kor', '마요네즈' => 'kor', '케첩' => 'kor', '와인' => 'kor', '맥주' => 'kor', '물' => 'kor', '주스' => 'kor', '수프' => 'kor', '샐러드' => 'kor', '커피' => 'kor', '차' => 'kor', // ========== 俄语食材 (150+词汇) ========== 'яйцо' => 'ru', 'яйца' => 'ru', 'сыр' => 'ru', 'масло' => 'ru', 'молоко' => 'ru', 'ветчина' => 'ru', 'колбаса' => 'ru', 'хлеб' => 'ru', 'багет' => 'ru', 'круассан' => 'ru', 'сливки' => 'ru', 'йогурт' => 'ru', 'курица' => 'ru', 'говядина' => 'ru', 'свинина' => 'ru', 'баранина' => 'ru', 'утка' => 'ru', 'индейка' => 'ru', 'мясо' => 'ru', 'рыба' => 'ru', 'лосось' => 'ru', 'тунец' => 'ru', 'креветка' => 'ru', 'кальмар' => 'ru', 'овощ' => 'ru', 'овощи' => 'ru', 'морковь' => 'ru', 'помидор' => 'ru', 'лук' => 'ru', 'чеснок' => 'ru', 'перец' => 'ru', 'яблоко' => 'ru', 'груша' => 'ru', 'виноград' => 'ru', 'клубника' => 'ru', 'банан' => 'ru', 'апельсин' => 'ru', 'лимон' => 'ru', 'сахар' => 'ru', 'соль' => 'ru', 'перец' => 'ru', 'масло' => 'ru', 'уксус' => 'ru', 'мука' => 'ru', 'рис' => 'ru', 'паста' => 'ru', 'картофель' => 'ru', 'огурец' => 'ru', 'шпинат' => 'ru', 'салат' => 'ru', 'капуста' => 'ru', 'брокколи' => 'ru', 'цветная капуста' => 'ru', 'горох' => 'ru', 'кукуруза' => 'ru', 'пшеница' => 'ru', 'овёс' => 'ru', 'орех' => 'ru', 'миндаль' => 'ru', 'шоколад' => 'ru', 'печенье' => 'ru', 'торт' => 'ru', 'мороженое' => 'ru', 'мёд' => 'ru', 'варенье' => 'ru', 'горчица' => 'ru', 'майонез' => 'ru', 'кетчуп' => 'ru', 'вино' => 'ru', 'пиво' => 'ru', 'вода' => 'ru', 'сок' => 'ru', 'суп' => 'ru', 'салат' => 'ru', 'кофе' => 'ru', 'чай' => 'ru', // ========== 阿拉伯语食材 (100+词汇) ========== 'بيض' => 'ara', 'جبن' => 'ara', 'حليب' => 'ara', 'زبدة' => 'ara', 'لحم' => 'ara', 'خبز' => 'ara', 'دجاج' => 'ara', 'لحم بقر' => 'ara', 'لحم خنزير' => 'ara', 'سمك' => 'ara', 'خضار' => 'ara', 'جزر' => 'ara', 'طماطم' => 'ara', 'بصل' => 'ara', 'ثوم' => 'ara', 'فلفل' => 'ara', 'تفاح' => 'ara', 'كمثرى' => 'ara', 'عنب' => 'ara', 'فراولة' => 'ara', 'موز' => 'ara', 'برتقال' => 'ara', 'ليمون' => 'ara', 'سكر' => 'ara', 'ملح' => 'ara', 'زيت' => 'ara', 'خل' => 'ara', 'دقيق' => 'ara', 'أرز' => 'ara', 'بطاطس' => 'ara', 'قهوة' => 'ara', 'شاي' => 'ara', 'ماء' => 'ara', ]; // 检测器实例 private $detector = null; public function __construct() { parent::__construct(); } /** * 主检测方法 - 优化版 */ public function check($text,$language) { $result_language = ''; if (empty($text) || !is_string($text)) { // return false; return 'zh'; } $text = trim($text); // 如果是空字符串 if ($text === '') { // return false; return 'zh'; } $lowerText = mb_strtolower($text, 'UTF-8'); // 1. 先查快速食材词典(最快最准) if (isset($this->foodDictionary[$lowerText])) { // return $this->foodDictionary[$lowerText]; $result_language = $this->foodDictionary[$lowerText]; return $this->additional_condition_filter($result_language,$language); } // 2. 重音字符快速检测(在库检测之前) $accentResult = $this->quickAccentDetection($text); if ($accentResult !== false) { // return $accentResult; return $this->additional_condition_filter($accentResult,$language); } // 3. 单个字符特殊处理 if (mb_strlen($text) === 1) { $singleCharResult = $this->detectSingleChar($text); if ($singleCharResult !== false) { // return $singleCharResult; return $this->additional_condition_filter($singleCharResult,$language); } } // 4. 使用 patrickschur/language-detection $result = $this->detectWithPatrickschur($text); // 5. 如果是拉丁字母且有重音字符,进行强化检测 if (preg_match('/[áéíóúñüéèêëàâæçîïôœùûÿáàâãéêíóôõúüçäöüß]/u', $text)) { if ($result['confidence'] < 0.5) { $strongResult = $this->strongAccentDetection($text, $result['all_results']); if ($strongResult !== false) { // return $strongResult; return $this->additional_condition_filter($strongResult,$language); } } } if ($result['lang'] !== false && $result['confidence'] >= 0.3) { // return $result['lang']; return $this->additional_condition_filter($result['lang'],$language); } // 6. 增强检测 $enhancedResult = $this->enhancedDetection($text, $result); if ($enhancedResult !== false) { // return $enhancedResult; return $this->additional_condition_filter($enhancedResult,$language); } // 7. 返回检测结果或默认英语 return $this->additional_condition_filter($result['lang'],$language); // return $result['lang'] !== false ? $result['lang'] : 'en'; } // /** // * 主检测方法 - 优化版 // */ // public function check($text) // { // if (empty($text) || !is_string($text)) { // return false; // } // $text = trim($text); // // 如果是空字符串 // if ($text === '') { // return false; // } // $lowerText = mb_strtolower($text, 'UTF-8'); // // 1. 先查快速食材词典(最快最准) // if (isset($this->foodDictionary[$lowerText])) { // return $this->foodDictionary[$lowerText]; // } // // 2. 重音字符快速检测(在库检测之前) // $accentResult = $this->quickAccentDetection($text); // if ($accentResult !== false) { // return $accentResult; // } // // 3. 单个字符特殊处理 // if (mb_strlen($text) === 1) { // $singleCharResult = $this->detectSingleChar($text); // if ($singleCharResult !== false) { // return $singleCharResult; // } // } // // 4. 使用 patrickschur/language-detection // $result = $this->detectWithPatrickschur($text); // // 5. 如果是拉丁字母且有重音字符,进行强化检测 // if (preg_match('/[áéíóúñüéèêëàâæçîïôœùûÿáàâãéêíóôõúüçäöüß]/u', $text)) { // if ($result['confidence'] < 0.5) { // $strongResult = $this->strongAccentDetection($text, $result['all_results']); // if ($strongResult !== false) { // return $strongResult; // } // } // } // if ($result['lang'] !== false && $result['confidence'] >= 0.3) { // return $result['lang']; // } // // 6. 增强检测 // $enhancedResult = $this->enhancedDetection($text, $result); // if ($enhancedResult !== false) { // return $enhancedResult; // } // // 7. 返回检测结果或默认英语 // return $result['lang'] !== false ? $result['lang'] : 'en'; // } /** * 快速重音字符检测 */ private function quickAccentDetection($text) { // 西班牙语特有字符 if (strpos($text, 'ñ') !== false || strpos($text, '¡') !== false || strpos($text, '¿') !== false) { return 'spa'; } // 葡萄牙语特有字符 if (strpos($text, 'ã') !== false || strpos($text, 'õ') !== false) { return 'pt'; } // 法语特有字符 if (strpos($text, 'œ') !== false || strpos($text, 'æ') !== false) { return 'fra'; } // 德语特有字符 if (strpos($text, 'ß') !== false) { return 'de'; } return false; } /** * 强化的重音字符检测 */ private function strongAccentDetection($text, $initialScores = []) { $lowerText = mb_strtolower($text, 'UTF-8'); $scores = [ 'spa' => 0, // 西班牙语 'fra' => 0, // 法语 'pt' => 0, // 葡萄牙语 'de' => 0, // 德语 ]; // 特征字符加权 $features = [ 'spa' => [ 'chars' => ['á', 'é', 'í', 'ó', 'ú', 'ñ', 'ü', '¡', '¿'], 'unique' => ['ñ', '¡', '¿'], 'score' => 0 ], 'fra' => [ 'chars' => ['é', 'è', 'ê', 'ë', 'à', 'â', 'æ', 'ç', 'î', 'ï', 'ô', 'œ', 'ù', 'û', 'ÿ'], 'unique' => ['œ', 'æ'], 'score' => 0 ], 'pt' => [ 'chars' => ['á', 'à', 'â', 'ã', 'é', 'ê', 'í', 'ó', 'ô', 'õ', 'ú', 'ü', 'ç'], 'unique' => ['ã', 'õ'], 'score' => 0 ], 'de' => [ 'chars' => ['ä', 'ö', 'ü', 'ß'], 'unique' => ['ß'], 'score' => 0 ] ]; // 计算特征分 foreach ($features as $lang => $data) { foreach ($data['chars'] as $char) { if (mb_strpos($text, $char) !== false) { $scores[$lang] += 2; } } foreach ($data['unique'] as $char) { if (mb_strpos($text, $char) !== false) { $scores[$lang] += 5; // 特有字符高分 } } } // 结合库的检测结果 if (!empty($initialScores)) { foreach ($initialScores as $lang => $score) { $ourCode = $this->isoToYourCode($lang); if ($ourCode !== false && isset($scores[$ourCode])) { $scores[$ourCode] += $score * 3; // 加权更高 } } } // 查找最高分 arsort($scores); $topLang = key($scores); $topScore = current($scores); // 如果分数足够高 if ($topScore >= 5) { $secondScore = next($scores); if ($topScore - $secondScore >= 2) { return $topLang; } } return false; } /** * 使用 patrickschur/language-detection 检测 */ private function detectWithPatrickschur($text) { try { if ($this->detector === null) { $this->detector = new Language(); } $results = $this->detector ->detect($text) ->limit(0, 3); if (empty($results)) { return ['lang' => false, 'confidence' => 0, 'all_results' => []]; } $resultArray = []; foreach ($results as $lang => $score) { $resultArray[$lang] = $score; } arsort($resultArray); $topLang = key($resultArray); $confidence = current($resultArray); $yourLangCode = $this->isoToYourCode($topLang); return [ 'lang' => $yourLangCode, 'confidence' => $confidence, 'all_results' => $resultArray ]; } catch (\Exception $e) { \think\Log::error('语言检测失败: ' . $e->getMessage()); return ['lang' => false, 'confidence' => 0, 'all_results' => []]; } } /** * 增强检测 */ private function enhancedDetection($text, $initialResult) { $text = trim($text); if (mb_strlen($text) <= 3) { $charResult = $this->shortTextDetection($text); if ($charResult !== false) { return $charResult; } } if (preg_match('/^[\p{Latin}\s\pP]+$/u', $text)) { $latinResult = $this->detectLatinLanguage($text, $initialResult['all_results']); if ($latinResult !== false) { return $latinResult; } } return $this->detectByAdvancedRules($text); } /** * 短文本检测 */ private function shortTextDetection($text) { $text = trim($text); if (preg_match('/^[a-zA-Z]+$/u', $text)) { return false; } if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $text)) { return 'zh'; } if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $text)) { return 'jp'; } if (preg_match('/[\x{ac00}-\x{d7af}]/u', $text)) { return 'kor'; } if (preg_match('/[\x{0600}-\x{06ff}]/u', $text)) { return 'ara'; } if (preg_match('/[\x{0400}-\x{04ff}]/u', $text)) { return 'ru'; } return false; } /** * 拉丁语系检测 */ private function detectLatinLanguage($text, $initialScores = []) { $lowerText = mb_strtolower($text, 'UTF-8'); $scores = [ 'fra' => 0, 'spa' => 0, 'pt' => 0, 'de' => 0, 'en' => 0, ]; if (preg_match('/[éèêëàâæçîïôœùûÿ]/u', $text)) { $scores['fra'] += 3; } if (preg_match('/[áéíóúñü¡¿]/u', $text)) { $scores['spa'] += 2; } if (preg_match('/[áàâãéêíóôõúüç]/u', $text)) { $scores['pt'] += 2; } if (preg_match('/[äöüß]/u', $text)) { $scores['de'] += 3; } if (strpos($text, 'ñ') !== false) { $scores['spa'] += 3; } if (strpos($text, 'ã') !== false || strpos($text, 'õ') !== false) { $scores['pt'] += 3; } if (strpos($text, 'œ') !== false || strpos($text, 'æ') !== false) { $scores['fra'] += 3; } if (strpos($text, 'ß') !== false) { $scores['de'] += 3; } if (strpos($text, '¡') !== false || strpos($text, '¿') !== false) { $scores['spa'] += 3; } if (!empty($initialScores)) { foreach ($initialScores as $lang => $score) { $ourCode = $this->isoToYourCode($lang); if ($ourCode !== false && isset($scores[$ourCode])) { $scores[$ourCode] += $score * 2; } } } $commonWords = [ 'fra' => ['le', 'la', 'les', 'un', 'une', 'des', 'du', 'de', 'à'], 'spa' => ['el', 'la', 'los', 'las', 'un', 'una', 'de', 'y', 'a'], 'pt' => ['o', 'a', 'os', 'as', 'um', 'uma', 'de', 'e'], 'de' => ['der', 'die', 'das', 'ein', 'und'], ]; foreach ($commonWords as $lang => $words) { if (in_array($lowerText, $words) && isset($scores[$lang])) { $scores[$lang] += 5; } } arsort($scores); $topLang = key($scores); $topScore = current($scores); if ($topScore >= 3) { return $topLang; } return false; } /** * 高级规则检测 */ private function detectByAdvancedRules($text) { $cleanText = trim($text); if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $cleanText)) { return 'zh'; } if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $cleanText)) { return 'jp'; } if (preg_match('/[\x{ac00}-\x{d7af}]/u', $cleanText)) { return 'kor'; } if (preg_match('/[\x{0600}-\x{06ff}]/u', $cleanText)) { return 'ara'; } if (preg_match('/[\x{0400}-\x{04ff}]/u', $cleanText)) { return 'ru'; } return false; } /** * 单个字符检测 */ private function detectSingleChar($char) { if (preg_match('/[\x{4e00}-\x{9fa5}]/u', $char)) { return 'zh'; } if (preg_match('/[\x{3040}-\x{309f}\x{30a0}-\x{30ff}]/u', $char)) { return 'jp'; } if (preg_match('/[\x{ac00}-\x{d7af}]/u', $char)) { return 'kor'; } if (preg_match('/[\x{0600}-\x{06ff}]/u', $char)) { return 'ara'; } if (preg_match('/[\x{0400}-\x{04ff}]/u', $char)) { return 'ru'; } return false; } /** * ISO代码转我们的代码 */ private function isoToYourCode($isoCode) { return isset($this->isoToOurCode[$isoCode]) ? $this->isoToOurCode[$isoCode] : false; } /** * 批量检测 */ public function batchCheck($texts) { if (!is_array($texts)) { return []; } $results = []; foreach ($texts as $text) { $results[] = $this->check($text); } return $results; } /** * 获取所有支持的语言 */ public function getSupportedLanguages() { return $this->languageMap; } /** * 添加自定义词汇 */ public function addToDictionary($word, $languageCode) { if (isset($this->languageMap[$languageCode])) { $this->foodDictionary[mb_strtolower($word, 'UTF-8')] = $languageCode; return true; } return false; } public function additional_condition_filter($judgment, $language) { $result_language = ''; // 7. 返回检测结果或默认英语 if($judgment !== false){ $result_language = $judgment; if($result_language == 'zh' || $result_language == 'jp'){ if($language == 'zh'){ $result_language = 'zh'; }else if($language == 'jp'){ $result_language = 'jp'; } }else if($result_language == 'en' || $result_language == 'fra' || $result_language == 'de' || $result_language == 'pt' || $result_language == 'spa'){ if($language == 'en'){ $result_language = 'en'; }else if($language == 'fra'){ $result_language = 'fra'; }else if($language == 'de'){ $result_language = 'de'; }else if($language == 'pt'){ $result_language = 'pt'; }else if($language == 'spa'){ $result_language = 'spa'; } } }else{ if($language){ $result_language = $language; }else{ $result_language = 'zh'; } } return $result_language; } /** * 测试函数 */ public function test() { $testWords = [ 'maíz' => 'spa', // 西班牙语 'oeuf' => 'fra', // 法语 'huevo' => 'spa', // 西班牙语 'ovo' => 'pt', // 葡萄牙语 'fromage' => 'fra', // 法语 'queso' => 'spa', // 西班牙语 'queijo' => 'pt', // 葡萄牙语 'crème' => 'fra', // 法语 'jalapeño' => 'spa', // 西班牙语 'pão' => 'pt', // 葡萄牙语 'Ei' => 'de', // 德语 'яйцо' => 'ru', // 俄语 '卵' => 'jp', // 日语 '蛋' => 'zh', // 中文 '계란' => 'kor', // 韩语 'بيض' => 'ara', // 阿拉伯语 'egg' => 'en', // 英语 ]; $results = []; foreach ($testWords as $word => $expected) { $detected = $this->check($word); $results[$word] = [ 'detected' => $detected, 'expected' => $expected, 'correct' => $detected === $expected ]; } return $results; } /** * 获取统计信息 */ public function getStats() { $stats = []; foreach ($this->languageMap as $code => $info) { $count = 0; foreach ($this->foodDictionary as $word => $lang) { if ($lang === $code) { $count++; } } $stats[$code] = [ 'name' => $info[1], 'word_count' => $count ]; } return $stats; } }