Files
autoAiWorkSys/api/middleware/schedule/utils/keywordMatcher.js
张成 65833dd32d 11
2025-12-30 15:46:18 +08:00

226 lines
7.0 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 关键词匹配工具
* 提供职位描述的关键词匹配和评分功能
*/
class KeywordMatcher {
/**
* 检查是否包含排除关键词
* @param {string} text - 待检查的文本
* @param {string[]} excludeKeywords - 排除关键词列表
* @returns {{matched: boolean, keywords: string[]}} 匹配结果
*/
static matchExcludeKeywords(text, excludeKeywords = []) {
if (!text || !excludeKeywords || excludeKeywords.length === 0) {
return { matched: false, keywords: [] };
}
const matched = [];
const lowerText = text.toLowerCase();
for (const keyword of excludeKeywords) {
if (!keyword || !keyword.trim()) continue;
const lowerKeyword = keyword.toLowerCase().trim();
if (lowerText.includes(lowerKeyword)) {
matched.push(keyword);
}
}
return {
matched: matched.length > 0,
keywords: matched
};
}
/**
* 检查是否包含过滤关键词(必须匹配)
* @param {string} text - 待检查的文本
* @param {string[]} filterKeywords - 过滤关键词列表
* @returns {{matched: boolean, keywords: string[], matchCount: number}} 匹配结果
*/
static matchFilterKeywords(text, filterKeywords = []) {
if (!text) {
return { matched: false, keywords: [], matchCount: 0 };
}
if (!filterKeywords || filterKeywords.length === 0) {
return { matched: true, keywords: [], matchCount: 0 };
}
const matched = [];
const lowerText = text.toLowerCase();
for (const keyword of filterKeywords) {
if (!keyword || !keyword.trim()) continue;
const lowerKeyword = keyword.toLowerCase().trim();
if (lowerText.includes(lowerKeyword)) {
matched.push(keyword);
}
}
// 只要匹配到至少一个过滤关键词即可通过
return {
matched: matched.length > 0,
keywords: matched,
matchCount: matched.length
};
}
/**
* 计算关键词匹配奖励分数
* @param {string} text - 待检查的文本
* @param {string[]} keywords - 关键词列表
* @param {object} options - 选项
* @returns {{score: number, matchedKeywords: string[], matchCount: number}}
*/
static calculateBonus(text, keywords = [], options = {}) {
const {
baseScore = 10, // 每个关键词的基础分
maxBonus = 50, // 最大奖励分
caseSensitive = false // 是否区分大小写
} = options;
if (!text || !keywords || keywords.length === 0) {
return { score: 0, matchedKeywords: [], matchCount: 0 };
}
const matched = [];
const searchText = caseSensitive ? text : text.toLowerCase();
for (const keyword of keywords) {
if (!keyword || !keyword.trim()) continue;
const searchKeyword = caseSensitive ? keyword.trim() : keyword.toLowerCase().trim();
if (searchText.includes(searchKeyword)) {
matched.push(keyword);
}
}
const score = Math.min(matched.length * baseScore, maxBonus);
return {
score,
matchedKeywords: matched,
matchCount: matched.length
};
}
/**
* 高亮匹配的关键词(用于展示)
* @param {string} text - 原始文本
* @param {string[]} keywords - 关键词列表
* @param {string} prefix - 前缀标记(默认 <mark>
* @param {string} suffix - 后缀标记(默认 </mark>
* @returns {string} 高亮后的文本
*/
static highlight(text, keywords = [], prefix = '<mark>', suffix = '</mark>') {
if (!text || !keywords || keywords.length === 0) {
return text;
}
let result = text;
for (const keyword of keywords) {
if (!keyword || !keyword.trim()) continue;
const regex = new RegExp(`(${this.escapeRegex(keyword.trim())})`, 'gi');
result = result.replace(regex, `${prefix}$1${suffix}`);
}
return result;
}
/**
* 转义正则表达式特殊字符
* @param {string} str - 待转义的字符串
* @returns {string} 转义后的字符串
*/
static escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* 综合匹配(排除 + 过滤 + 奖励)
* @param {string} text - 待检查的文本
* @param {object} config - 配置
* @param {string[]} config.excludeKeywords - 排除关键词
* @param {string[]} config.filterKeywords - 过滤关键词
* @param {string[]} config.bonusKeywords - 奖励关键词
* @returns {{pass: boolean, reason?: string, score: number, details: object}}
*/
static match(text, config = {}) {
const {
excludeKeywords = [],
filterKeywords = [],
bonusKeywords = []
} = config;
// 1. 检查排除关键词
const excludeResult = this.matchExcludeKeywords(text, excludeKeywords);
if (excludeResult.matched) {
return {
pass: false,
reason: `包含排除关键词: ${excludeResult.keywords.join(', ')}`,
score: 0,
details: { exclude: excludeResult }
};
}
// 2. 检查过滤关键词(必须匹配)
const filterResult = this.matchFilterKeywords(text, filterKeywords);
if (filterKeywords.length > 0 && !filterResult.matched) {
return {
pass: false,
reason: '不包含任何必需关键词',
score: 0,
details: { filter: filterResult }
};
}
// 3. 计算奖励分数
const bonusResult = this.calculateBonus(text, bonusKeywords);
return {
pass: true,
score: bonusResult.score,
details: {
exclude: excludeResult,
filter: filterResult,
bonus: bonusResult
}
};
}
/**
* 批量匹配职位列表
* @param {Array} jobs - 职位列表
* @param {object} config - 匹配配置
* @param {Function} textExtractor - 文本提取函数 (job) => string
* @returns {Array} 匹配通过的职位(带匹配信息)
*/
static filterJobs(jobs, config, textExtractor = (job) => `${job.name || ''} ${job.description || ''}`) {
if (!jobs || jobs.length === 0) {
return [];
}
const filtered = [];
for (const job of jobs) {
const text = textExtractor(job);
const matchResult = this.match(text, config);
if (matchResult.pass) {
filtered.push({
...job,
_matchInfo: matchResult
});
}
}
return filtered;
}
}
module.exports = KeywordMatcher;