Analyzing column sizes in csv files
I had this problem with a rather big csv file. I needed to find the maximum length of each column in the file. This could probably be solved rather easily with M$ Excel, but anyway it is much more fun to write a program. It is rather crude but works.—
Download the CSV-checker (executable)
Download source-code (visual studio 2005)
Command line syntax
CSV-checker.exe filepath delimeter
Program output
----------- FILE ANALYZED -----------
Column nr | Column name | Max length
-------------------------------------
1 | Columname | 2
2 | Columname | 37
3 | Columname | 3
4 | Columname | 44
-------------------------------------
Nr of columns: 4
Nr of rows: 2701
Nr of chars: 2117584
using System;
using System.Collections.Generic;
using System.Text;
namespace CSV_checker
{
class Program
{
static void Main(string[] args)
{
try
{
// check arguments
if (args.Length < 1)
{
Console.WriteLine("Missing file argument");
Environment.Exit(-1);
}
else if (!System.IO.File.Exists(args[0]))
{
Console.WriteLine("That file does not exist");
Environment.Exit(-1);
}
// set delimiter
char delimiter = ';';
if (args.Length > 1 && args[1] != "" && args[1].Length == 1)
{
delimiter = char.Parse(args[1]);
}
// read file
string[] lines = System.IO.File.ReadAllLines(@args[0]);
int nrColumns = lines[0].Split(new char[] { delimiter }).Length;
int[] maxColumns = new int[nrColumns];
string[] columnNames = new string[nrColumns];
long nrChars = 0;
// analyze file
for (int i = 0; i < lines.Length; i++)
{
string[] parts = lines[i].Split(new char[] { ';' });
for (int j = 0; j < parts.Length; j++)
{
nrChars += parts.Length;
if (i == 0)
{
columnNames[j] = parts[j];
}
else if (parts[j].Length > maxColumns[j])
{
maxColumns[j] = parts[j].Length;
}
}
}
// Print results
Console.WriteLine("----------- FILE ANALYZED -----------");
Console.WriteLine("Column nr | Column name | Max length");
Console.WriteLine("-------------------------------------");
for (int i = 0; i < nrColumns; i++)
{
Console.WriteLine(i + 1 + " | " + columnNames[i] + " | " + maxColumns[i]);
}
Console.WriteLine("-------------------------------------");
Console.WriteLine("Nr of columns: " + nrColumns.ToString());
Console.WriteLine("Nr of rows: " + lines.Length);
Console.WriteLine("Nr of chars: " + nrChars);
//Console.ReadLine();
}
catch (Exception ex)
{
Console.WriteLine("ERROR: " + ex.Message);
}
}
}
}


