Files
autoAiWorkSys/_script/import_company_info_direct.js
张成 4686a24522 1
2025-12-12 16:38:48 +08:00

235 lines
7.1 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const Framework = require('../framework/node-core-framework.js');
const frameworkConfig = require('../config/framework.config.js');
const fs = require('fs');
const path = require('path');
/**
* 直接导入公司信息到数据库
* 逐条插入,出错时跳过并继续
*/
// 从注册地址提取省份和区域
function extractProvinceAndCity(address) {
if (!address) return { province: '', city: '' };
// 提取省份和区域
let province = '';
let city = ''; // 这里实际存储的是区域(区),如:宝山区、杨浦区
// 匹配直辖市格式XX市XX区...
// 例如:上海市宝山区 -> province: "上海", city: "宝山区"
const shMatch = address.match(/^(上海|北京|天津|重庆)市(.+?区)/);
if (shMatch) {
province = shMatch[1]; // 上海
city = shMatch[2]; // 宝山区
return { province, city };
}
// 匹配其他省份格式XX省XX市XX区...
const provinceMatch = address.match(/^(.+?省)(.+?市)(.+?区)/);
if (provinceMatch) {
province = provinceMatch[1].replace('省', ''); // 去掉"省"字
city = provinceMatch[3]; // 区域
return { province, city };
}
// 匹配省份/直辖市(没有区域信息)
const simpleMatch = address.match(/^(.+?省|.+?市|.+?自治区|.+?特别行政区)/);
if (simpleMatch) {
province = simpleMatch[1].replace(/省|市|自治区|特别行政区$/, '');
// 如果没有找到区域city 保持为空
}
return { province, city };
}
// 读取文件并解析数据
function parseCompanyData() {
const filePath = path.join(__dirname, '../_doc/公司xinxi.md');
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');
const companies = [];
// 跳过表头(第一行)
for (let i = 1; i < lines.length; i++) {
const line = lines[i].trim();
if (!line) continue;
// 按制表符分割
const parts = line.split('\t');
if (parts.length < 7) continue;
const sequenceNumber = parts[0] || '';
const stockCode = parts[1] || '';
const companyName = parts[2] || '';
const registeredAddress = parts[3] || '';
const phone = parts[4] || '';
const email = parts[5] || '';
const website = parts[6] || '';
// 提取省份和城市
const { province, city } = extractProvinceAndCity(registeredAddress);
// 判断是否上市(有证券代码就是上市)
const isListed = stockCode ? 1 : 0;
companies.push({
sequence_number: sequenceNumber ? parseInt(sequenceNumber) : null,
stock_code: stockCode || null,
company_name: companyName,
registered_address: registeredAddress || null,
province: province || null,
city: city || null,
phone: phone || null,
email: email || null,
website: website || null,
is_listed: isListed,
recommendation_level: 'normal',
is_enabled: 1
});
}
return companies;
}
// 主函数
async function main() {
let framework = null;
let company_info = null;
try {
console.log('🚀 开始导入公司信息数据...\n');
// 解析数据
console.log('📖 正在解析数据文件...');
const companies = parseCompanyData();
console.log(`✅ 成功解析 ${companies.length} 条公司信息\n`);
// 初始化框架
console.log('🔌 正在连接数据库...');
framework = await Framework.init(frameworkConfig);
const models = Framework.getModels();
if (!models) {
throw new Error('无法获取模型列表');
}
company_info = models.company_info;
if (!company_info) {
throw new Error('无法获取 company_info 模型');
}
console.log('✅ 数据库连接成功\n');
// 统计信息
let successCount = 0;
let errorCount = 0;
const errors = [];
// 逐条插入数据
console.log('📝 开始插入数据...\n');
for (let i = 0; i < companies.length; i++) {
const company = companies[i];
const index = i + 1;
try {
// 检查是否已存在(根据公司名称)
const existing = await company_info.findOne({
where: {
company_name: company.company_name
}
});
if (existing) {
// 如果已存在,更新区域信息
if (company.city && company.city !== existing.city) {
await company_info.update(
{
province: company.province,
city: company.city
},
{
where: { company_name: company.company_name }
}
);
console.log(`🔄 [${index}/${companies.length}] 更新区域: ${company.company_name} -> ${company.city}`);
} else {
console.log(`⏭️ [${index}/${companies.length}] 跳过已存在: ${company.company_name}`);
}
continue;
}
// 插入数据
await company_info.create(company);
successCount++;
// 每 10 条显示一次进度
if (index % 10 === 0 || index === companies.length) {
console.log(`✅ [${index}/${companies.length}] 已插入 ${successCount} 条,跳过 ${errorCount}`);
}
} catch (error) {
errorCount++;
const errorMsg = `[${index}/${companies.length}] ${company.company_name}: ${error.message}`;
errors.push(errorMsg);
// 显示错误(但继续执行)
console.log(`${errorMsg}`);
// 如果错误太多,显示警告
if (errorCount > 50 && errorCount % 50 === 0) {
console.log(`⚠️ 警告: 已累计 ${errorCount} 个错误`);
}
}
// 每 100 条稍作延迟,避免数据库压力过大
if (index % 100 === 0) {
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// 显示最终统计
console.log('\n' + '='.repeat(60));
console.log('📊 导入完成统计');
console.log('='.repeat(60));
console.log(`总数量: ${companies.length}`);
console.log(`✅ 成功: ${successCount}`);
console.log(`❌ 失败: ${errorCount}`);
console.log(`⏭️ 跳过: ${companies.length - successCount - errorCount} 条(已存在)`);
if (errors.length > 0) {
console.log('\n❌ 错误详情(前 20 条):');
errors.slice(0, 20).forEach(err => console.log(` - ${err}`));
if (errors.length > 20) {
console.log(` ... 还有 ${errors.length - 20} 个错误未显示`);
}
}
console.log('\n✨ 导入任务完成!');
} catch (error) {
console.error('\n❌ 导入失败:', error);
console.error(error.stack);
process.exit(1);
} finally {
// 关闭数据库连接
if (framework && company_info && company_info.sequelize) {
try {
await company_info.sequelize.close();
console.log('\n🔌 数据库连接已关闭');
} catch (error) {
console.error('关闭数据库连接时出错:', error.message);
}
}
}
}
// 执行
main().catch(error => {
console.error('程序执行失败:', error);
process.exit(1);
});