要在现有的C#程序中添加功能,输出SQL Server数据血缘关系的三张表到Excel文件,我们需要进行以下几个步骤:
分析存储过程、视图和函数中的引用关系,构建数据血缘关系。
按依赖性从小到大排序表的顺序。
找出对应生成表的数据的存储过程。
将结果输出到Excel文件。
以下是完整的代码实现:
using Microsoft.SqlServer.TransactSql.ScriptDom;
using OfficeOpenXml;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
class Program
{
static void Main()
{
string directoryPath = @"<搜索的目录路径>";
var dataRelations = new Dictionary<string, List<string>>();
var tableProcedures = new Dictionary<string, List<string>>();
var allTablesAndViews = new HashSet<string>();
ProcessSqlFiles(directoryPath, dataRelations, tableProcedures, allTablesAndViews);
var sortedTables = SortTablesByDependency(dataRelations, allTablesAndViews);
WriteToExcel(dataRelations, sortedTables, tableProcedures);
}
static void ProcessSqlFiles(string directoryPath, Dictionary<string, List<string>> dataRelations, Dictionary<string, List<string>> tableProcedures, HashSet<string> allTablesAndViews)
{
foreach (string filePath in Directory.EnumerateFiles(directoryPath, "*.sql", SearchOption.AllDirectories))
{
Console.WriteLine($"Processing file: {filePath}");
string sqlContent = File.ReadAllText(filePath);
ProcessSqlContent(sqlContent, dataRelations, tableProcedures, allTablesAndViews);
CheckCreateStatements(sqlContent, tableProcedures);
}
}
static void ProcessSqlContent(string sqlContent, Dictionary<string, List<string>> dataRelations, Dictionary<string, List<string>> tableProcedures, HashSet<string> allTablesAndViews)
{
TSql150Parser parser = new TSql150Parser(false);
IList<ParseError> errors;
TSqlFragment fragment = parser.Parse(new StringReader(sqlContent), out errors);
if (errors.Count == 0)
{
var referenceVisitor = new ReferenceVisitor(dataRelations, allTablesAndViews);
fragment.Accept(referenceVisitor);
}
else
{
foreach (var error in errors)
{
Console.WriteLine($"Parse error: {error.Message}");
}
}
}
static void CheckCreateStatements(string sqlContent, Dictionary<string, List<string>> tableProcedures)
{
// 匹配创建视图的语句
MatchCollection viewMatches = Regex.Matches(sqlContent, @"CREATE\s+VIEW\s+([^\s(]+)", RegexOptions.IgnoreCase);
foreach (Match match in viewMatches)
{
Console.WriteLine($"View: {match.Groups[1].Value}");
}
// 匹配创建存储过程的语句
MatchCollection sprocMatches = Regex.Matches(sqlContent, @"CREATE\s+PROC(?:EDURE)?\s+([^\s(]+)", RegexOptions.IgnoreCase);
foreach (Match match in sprocMatches)
{
Console.WriteLine($"Stored Procedure: {match.Groups[1].Value}");
tableProcedures[match.Groups[1].Value] = new List<string>();
}
// 匹配创建函数的语句
MatchCollection functionMatches = Regex.Matches(sqlContent, @"CREATE\s+(?:FUNCTION|AGGREGATE)\s+([^\s(]+)", RegexOptions.IgnoreCase);
foreach (Match match in functionMatches)
{
Console.WriteLine($"User Defined Function: {match.Groups[1].Value}");
}
}
static List<string> SortTablesByDependency(Dictionary<string, List<string>> dataRelations, HashSet<string> allTablesAndViews)
{
var sorted = new List<string>();
var visited = new HashSet<string>();
foreach (var table in allTablesAndViews)
{
TopologicalSort(table, dataRelations, sorted, visited);
}
return sorted;
}
static void TopologicalSort(string node, Dictionary<string, List<string>> dataRelations, List<string> sorted, HashSet<string> visited)
{
if (visited.Contains(node)) return;
visited.Add(node);
if (dataRelations.ContainsKey(node))
{
foreach (var child in dataRelations[node])
{
TopologicalSort(child, dataRelations, sorted, visited);
}
}
sorted.Insert(0, node);
}
static void WriteToExcel(Dictionary<string, List<string>> dataRelations, List<string> sortedTables, Dictionary<string, List<string>> tableProcedures)
{
using (var package = new ExcelPackage())
{
var worksheet1 = package.Workbook.Worksheets.Add("Data Relations");
worksheet1.Cells[1, 1].Value = "Table/View Name";
worksheet1.Cells[1, 2].Value = "Generation Path";
int row = 2;
foreach (var table in sortedTables)
{
worksheet1.Cells[row, 1].Value = table;
worksheet1.Cells[row, 2].Value = GetGenerationPath(table, dataRelations);
row++;
}
var worksheet2 = package.Workbook.Worksheets.Add("Sorted Tables");
worksheet2.Cells[1, 1].Value = "Table/View Name";
for (int i = 0; i < sortedTables.Count; i++)
{
worksheet2.Cells[i + 2, 1].Value = sortedTables[i];
}
var worksheet3 = package.Workbook.Worksheets.Add("Table - Procedure Mapping");
worksheet3.Cells[1, 1].Value = "Table/View Name";
worksheet3.Cells[1, 2].Value = "Stored Procedures";
row = 2;
foreach (var table in sortedTables)
{
worksheet3.Cells[row, 1].Value = table;
worksheet3.Cells[row, 2].Value = string.Join(", ", tableProcedures.GetValueOrDefault(table, new List<string>()));
row++;
}
FileInfo file = new FileInfo("DataBloodline.xlsx");
package.SaveAs(file);
}
}
static string GetGenerationPath(string table, Dictionary<string, List<string>> dataRelations)
{
if (!dataRelations.ContainsKey(table)) return table;
return table + "->" + string.Join("->", dataRelations[table].Select(t => GetGenerationPath(t, dataRelations)));
}
}
class ReferenceVisitor : TSqlFragmentVisitor
{
private readonly Dictionary<string, List<string>> dataRelations;
private readonly HashSet<string> allTablesAndViews;
public ReferenceVisitor(Dictionary<string, List<string>> dataRelations, HashSet<string> allTablesAndViews)
{
this.dataRelations = dataRelations;
this.allTablesAndViews = allTablesAndViews;
}
public override void Visit(SelectQuerySpecification node)
{
VisitTableReferences(node.FromClause);
}
public override void Visit(InsertStatement node)
{
AddReference(node.TableReference);
}
public override void Visit(UpdateStatement node)
{
AddReference(node.TableReference);
}
public override void Visit(DeleteStatement node)
{
AddReference(node.TableReference);
}
public override void Visit(ViewDefinition node)
{
AddReference(node.SchemaObjectName);
}
private void VisitTableReferences(FromClause fromClause)
{
if (fromClause!= null)
{
foreach (var tableReference in fromClause.TableReferences)
{
AddReference(tableReference);
}
}
}
private void AddReference(TSqlFragment fragment)
{
if (fragment is TableReference tableReference)
{
var name = tableReference.SchemaObject.BaseIdentifier.Value;
AddInfo(name);
}
else if (fragment is SchemaObjectName schemaObjectName)
{
var name = schemaObjectName.BaseIdentifier.Value;
AddInfo(name);
}
}
private void AddInfo(string name)
{
allTablesAndViews.Add(name);
if (!dataRelations.ContainsKey(name))
{
dataRelations[name] = new List<string>();
}
}
}
代码说明:
Main方法:初始化变量并调用 ProcessSqlFiles 方法,最后调用 WriteToExcel 方法将结果输出到Excel文件。
ProcessSqlFiles方法:遍历指定目录及其子目录下的所有 .sql 文件,并对每个文件的内容执行 ProcessSqlContent 和 CheckCreateStatements 方法。
ProcessSqlContent方法:使用 Microsoft.SqlServer.TransactSql.ScriptDom 库解析SQL内容,获取引用关系。
CheckCreateStatements方法:使用正则表达式匹配SQL内容中的创建视图、存储过程和函数的语句,并更新 tableProcedures 字典。
SortTablesByDependency方法:使用拓扑排序按依赖性从小到大排序表的顺序。
WriteToExcel方法:使用EPPlus库将数据血缘关系的三张表输出到Excel文件。
ReferenceVisitor类:继承自 TSqlFragmentVisitor ,用于收集SQL语句中的表和视图引用关系。
请确保你已经安装了EPPlus库,可以通过NuGet安装:
Install-Package EPPlus