const Framework = require('../framework/node-core-framework.js'); const frameworkConfig = require('../config/framework.config.js'); const fs = require('fs'); const path = require('path'); /** * 直接导入公司信息到数据库 * 逐条插入,出错时跳过并继续 */ // 从注册地址提取省份和区域 function extractProvinceAndCity(address) { if (!address) return { province: '', city: '' }; // 提取省份和区域 let province = ''; let city = ''; // 这里实际存储的是区域(区),如:宝山区、杨浦区 // 匹配直辖市格式:XX市XX区... // 例如:上海市宝山区 -> province: "上海", city: "宝山区" const shMatch = address.match(/^(上海|北京|天津|重庆)市(.+?区)/); if (shMatch) { province = shMatch[1]; // 上海 city = shMatch[2]; // 宝山区 return { province, city }; } // 匹配其他省份格式:XX省XX市XX区... const provinceMatch = address.match(/^(.+?省)(.+?市)(.+?区)/); if (provinceMatch) { province = provinceMatch[1].replace('省', ''); // 去掉"省"字 city = provinceMatch[3]; // 区域 return { province, city }; } // 匹配省份/直辖市(没有区域信息) const simpleMatch = address.match(/^(.+?省|.+?市|.+?自治区|.+?特别行政区)/); if (simpleMatch) { province = simpleMatch[1].replace(/省|市|自治区|特别行政区$/, ''); // 如果没有找到区域,city 保持为空 } return { province, city }; } // 读取文件并解析数据 function parseCompanyData() { const filePath = path.join(__dirname, '../_doc/公司xinxi.md'); const content = fs.readFileSync(filePath, 'utf-8'); const lines = content.split('\n'); const companies = []; // 跳过表头(第一行) for (let i = 1; i < lines.length; i++) { const line = lines[i].trim(); if (!line) continue; // 按制表符分割 const parts = line.split('\t'); if (parts.length < 7) continue; const sequenceNumber = parts[0] || ''; const stockCode = parts[1] || ''; const companyName = parts[2] || ''; const registeredAddress = parts[3] || ''; const phone = parts[4] || ''; const email = parts[5] || ''; const website = parts[6] || ''; // 提取省份和城市 const { province, city } = extractProvinceAndCity(registeredAddress); // 判断是否上市(有证券代码就是上市) const isListed = stockCode ? 1 : 0; companies.push({ sequence_number: sequenceNumber ? parseInt(sequenceNumber) : null, stock_code: stockCode || null, company_name: companyName, registered_address: registeredAddress || null, province: province || null, city: city || null, phone: phone || null, email: email || null, website: website || null, is_listed: isListed, recommendation_level: 'normal', is_enabled: 1 }); } return companies; } // 主函数 async function main() { let framework = null; let company_info = null; try { console.log('🚀 开始导入公司信息数据...\n'); // 解析数据 console.log('📖 正在解析数据文件...'); const companies = parseCompanyData(); console.log(`✅ 成功解析 ${companies.length} 条公司信息\n`); // 初始化框架 console.log('🔌 正在连接数据库...'); framework = await Framework.init(frameworkConfig); const models = Framework.getModels(); if (!models) { throw new Error('无法获取模型列表'); } company_info = models.company_info; if (!company_info) { throw new Error('无法获取 company_info 模型'); } console.log('✅ 数据库连接成功\n'); // 统计信息 let successCount = 0; let errorCount = 0; const errors = []; // 逐条插入数据 console.log('📝 开始插入数据...\n'); for (let i = 0; i < companies.length; i++) { const company = companies[i]; const index = i + 1; try { // 检查是否已存在(根据公司名称) const existing = await company_info.findOne({ where: { company_name: company.company_name } }); if (existing) { // 如果已存在,更新区域信息 if (company.city && company.city !== existing.city) { await company_info.update( { province: company.province, city: company.city }, { where: { company_name: company.company_name } } ); console.log(`🔄 [${index}/${companies.length}] 更新区域: ${company.company_name} -> ${company.city}`); } else { console.log(`⏭️ [${index}/${companies.length}] 跳过已存在: ${company.company_name}`); } continue; } // 插入数据 await company_info.create(company); successCount++; // 每 10 条显示一次进度 if (index % 10 === 0 || index === companies.length) { console.log(`✅ [${index}/${companies.length}] 已插入 ${successCount} 条,跳过 ${errorCount} 条`); } } catch (error) { errorCount++; const errorMsg = `[${index}/${companies.length}] ${company.company_name}: ${error.message}`; errors.push(errorMsg); // 显示错误(但继续执行) console.log(`❌ ${errorMsg}`); // 如果错误太多,显示警告 if (errorCount > 50 && errorCount % 50 === 0) { console.log(`⚠️ 警告: 已累计 ${errorCount} 个错误`); } } // 每 100 条稍作延迟,避免数据库压力过大 if (index % 100 === 0) { await new Promise(resolve => setTimeout(resolve, 100)); } } // 显示最终统计 console.log('\n' + '='.repeat(60)); console.log('📊 导入完成统计'); console.log('='.repeat(60)); console.log(`总数量: ${companies.length}`); console.log(`✅ 成功: ${successCount} 条`); console.log(`❌ 失败: ${errorCount} 条`); console.log(`⏭️ 跳过: ${companies.length - successCount - errorCount} 条(已存在)`); if (errors.length > 0) { console.log('\n❌ 错误详情(前 20 条):'); errors.slice(0, 20).forEach(err => console.log(` - ${err}`)); if (errors.length > 20) { console.log(` ... 还有 ${errors.length - 20} 个错误未显示`); } } console.log('\n✨ 导入任务完成!'); } catch (error) { console.error('\n❌ 导入失败:', error); console.error(error.stack); process.exit(1); } finally { // 关闭数据库连接 if (framework && company_info && company_info.sequelize) { try { await company_info.sequelize.close(); console.log('\n🔌 数据库连接已关闭'); } catch (error) { console.error('关闭数据库连接时出错:', error.message); } } } } // 执行 main().catch(error => { console.error('程序执行失败:', error); process.exit(1); });