This commit is contained in:
张成
2026-04-08 17:27:40 +08:00
parent f2a8e61016
commit 51bbdacdda
5 changed files with 201 additions and 80 deletions

View File

@@ -8,6 +8,19 @@ const db = require('../../dbProxy');
* 自动投递只需调 filterAndScoreJobsForDeliver 一个方法。
*/
class JobFilterEngine {
getJobKey(job) {
return String(job.id || job.jobId || `${job.companyName || ''}|${job.jobTitle || ''}`);
}
getRemovedTitles(beforeJobs, afterJobs, limit = 5) {
const keptKeySet = new Set(afterJobs.map((job) => this.getJobKey(job)));
return beforeJobs
.filter((job) => !keptKeySet.has(this.getJobKey(job)))
.map((job) => job.jobTitle || '')
.filter(Boolean)
.slice(0, limit);
}
/**
* 过滤职位列表(薪资 → 标题须含词 → 关键词 → 活跃度 → 去重)
* @param {Array} jobs - 职位列表
@@ -23,50 +36,56 @@ class JobFilterEngine {
let filtered = [...jobs];
// 1. 薪资过滤
const beforeSalary = filtered.length;
const beforeSalaryJobs = [...filtered];
filtered = this.filterBySalary(filtered, config);
const salaryRemoved = beforeSalary - filtered.length;
const salaryRemoved = beforeSalaryJobs.length - filtered.length;
if (salaryRemoved > 0) {
console.log(`[jobFilterEngine] 步骤1-薪资过滤: 输入${beforeSalary} 输出${filtered.length} 剔除${salaryRemoved} (范围: ${config.min_salary ?? 0}-${config.max_salary ?? 0}K)`);
const removedTitles = this.getRemovedTitles(beforeSalaryJobs, filtered);
console.log(`[jobFilterEngine] 步骤1-薪资过滤: 范围=${config.min_salary ?? 0}-${config.max_salary ?? 0}K 剔除标题=${removedTitles.join(' | ') || '无'}`);
}
// 2. 职位标题须包含job_types.titleIncludeKeywords仅 jobTitle/jobName/name,与 commonSkills 无关)
const beforeTitleKw = filtered.length;
// 2. 职位标题须包含job_types.titleIncludeKeywords仅 jobTitle与 commonSkills 无关)
const beforeTitleFilterJobs = [...filtered];
filtered = this.filterByTitleIncludeKeywords(filtered, config);
const titleKwRemoved = beforeTitleKw - filtered.length;
const titleKwRemoved = beforeTitleFilterJobs.length - filtered.length;
if (titleKwRemoved > 0) {
console.log(`[jobFilterEngine] 步骤2-标题须含: 输入${beforeTitleKw} 输出${filtered.length} 剔除${titleKwRemoved} (须同时含: ${(config.title_include_keywords || []).join(' · ') || '无'})`);
const removedTitles = this.getRemovedTitles(beforeTitleFilterJobs, filtered);
console.log(`[jobFilterEngine] 步骤2-标题须含: 关键词=[${(config.title_include_keywords || []).join('、') || '无'}] 剔除标题=${removedTitles.join(' | ') || '无'}`);
}
// 3. 关键词过滤(排除词 + filter_keywords匹配标题与行业等
const beforeKeywords = filtered.length;
const beforeKeywordFilterJobs = [...filtered];
filtered = this.filterByKeywords(filtered, config);
const keywordsRemoved = beforeKeywords - filtered.length;
const keywordsRemoved = beforeKeywordFilterJobs.length - filtered.length;
if (keywordsRemoved > 0) {
console.log(`[jobFilterEngine] 步骤3-关键词过滤: 输入${beforeKeywords} 输出${filtered.length} 剔除${keywordsRemoved} (排除: ${(config.exclude_keywords || []).join(',') || '无'} 包含: ${(config.filter_keywords || []).join(',') || '无'})`);
const removedTitles = this.getRemovedTitles(beforeKeywordFilterJobs, filtered);
console.log(`[jobFilterEngine] 步骤3-关键词过滤: 排除=[${(config.exclude_keywords || []).join('、') || '无'}] 包含=[${(config.filter_keywords || []).join('、') || '无'}] 剔除标题=${removedTitles.join(' | ') || '无'}`);
}
// 4. 公司活跃度过滤
if (config.filter_inactive_companies) {
const beforeActivity = filtered.length;
const beforeActivityJobs = [...filtered];
filtered = await this.filterByCompanyActivity(filtered, config.company_active_days || 7);
const activityRemoved = beforeActivity - filtered.length;
const activityRemoved = beforeActivityJobs.length - filtered.length;
if (activityRemoved > 0) {
console.log(`[jobFilterEngine] 步骤4-公司活跃度过滤: 输入${beforeActivity} 输出${filtered.length} 剔除${activityRemoved}`);
const removedTitles = this.getRemovedTitles(beforeActivityJobs, filtered);
console.log(`[jobFilterEngine] 步骤4-公司活跃度过滤: 剔除标题=${removedTitles.join(' | ') || '无'}`);
}
}
// 5. 去重(同一公司、同一职位名称)
if (config.deduplicate) {
const beforeDedup = filtered.length;
const beforeDedupJobs = [...filtered];
filtered = this.deduplicateJobs(filtered);
const dedupRemoved = beforeDedup - filtered.length;
const dedupRemoved = beforeDedupJobs.length - filtered.length;
if (dedupRemoved > 0) {
console.log(`[jobFilterEngine] 步骤5-去重: 输入${beforeDedup} 输出${filtered.length} 剔除${dedupRemoved}`);
const removedTitles = this.getRemovedTitles(beforeDedupJobs, filtered);
console.log(`[jobFilterEngine] 步骤5-去重: 剔除标题=${removedTitles.join(' | ') || '无'}`);
}
}
console.log(`[jobFilterEngine] filterJobs 结束: 原始${jobs.length} 通过${filtered.length} 总剔除${jobs.length - filtered.length}`);
const keptTitles = filtered.map((j) => j.jobTitle || '').filter(Boolean).slice(0, 5);
console.log(`[jobFilterEngine] filterJobs 结束: 通过标题=${keptTitles.join(' | ') || '无'}`);
return filtered;
}
@@ -143,7 +162,7 @@ class JobFilterEngine {
}
/**
* 职位标题须包含配置中的每个子串AND 关系),不扫描描述/公司名/commonSkills
* 职位标题须包含配置中的关键词(命中任意一个即通过),不扫描描述/公司名/commonSkills
* @param {Array} jobs
* @param {object} config
* @returns {Array}
@@ -154,11 +173,11 @@ class JobFilterEngine {
return jobs;
}
return jobs.filter((job) => {
const title = `${job.jobTitle || job.jobName || job.name || ''}`.toLowerCase();
return kws.every((kw) => {
const title = `${job.jobTitle || ''}`.toLowerCase();
return kws.some((kw) => {
const k = String(kw || '').toLowerCase().trim();
if (!k) {
return true;
return false;
}
return title.includes(k);
});
@@ -251,7 +270,7 @@ class JobFilterEngine {
for (const job of jobs) {
const company = (job.company || job.companyName || '').toLowerCase().trim();
const jobName = (job.name || job.jobName || '').toLowerCase().trim();
const jobName = (job.jobTitle || '').toLowerCase().trim();
const key = `${company}||${jobName}`;
if (!seen.has(key)) {
@@ -334,8 +353,8 @@ class JobFilterEngine {
}
const jobText = [
job.name || job.jobName || '',
job.description || job.jobDescription || '',
job.jobTitle || '',
job.jobDescription || '',
job.skills || ''
].join(' ');