How to remove duplicate rows from a DataTable....

Someone at work asked this question so I spent five minutes and banged out this code. If it helps one person, it probably will help many so here it is....

--------- Demo Removing Duplicate rows in a DataTable ----------------

using System;

using System.Data;

using System.Collections;

 

namespace Demo_DataTableRemoveDupRows

{

      class Class1

      {

            [STAThread]

            static void Main(string[] args)

            {

 

                  // create an example datatable with duplicate rows

                  DataTable tbl = new DataTable();

 

                  tbl.Columns.Add("ColumnA");

                  tbl.Columns.Add("ColumnB");

                  tbl.Columns.Add("ColumnC");

                  for(int i = 0; i<10; i++)

                  {

                        DataRow nr = tbl.NewRow();

                        nr["ColumnA"] = "A" + i.ToString();

                        nr["ColumnB"] = "B" + i.ToString();

                        nr["ColumnC"] = "C" + i.ToString();

                        tbl.Rows.Add(nr);

                        // duplicate

                        nr = tbl.NewRow();

                        nr["ColumnA"] = "A" + i.ToString();

                        nr["ColumnB"] = "B" + i.ToString();

                        nr["ColumnC"] = "C" + i.ToString();

                        tbl.Rows.Add(nr);

                  }

 

                  PrintRows(tbl); // show table with duplicates

 

                  //Create an array of DataColumns to compare

                  //If these columns all match we consider the

                  //rows duplicate.

                  DataColumn[] keyColumns =

                              new DataColumn[]{tbl.Columns["ColumnA"],

                                               tbl.Columns["ColumnA"]};

 

                  //remove the duplicates

                  RemoveDuplicates(tbl, keyColumns);

 

 

                  PrintRows(tbl); // show table again

                  Console.ReadLine(); // pause to view output

 

            }

           

            /// <summary>

            /// Removes duplicate rows from given DataTable

            /// </summary>

            /// <param name="tbl">Table to scan for duplicate rows</param>

            /// <param name="KeyColumns">An array of DataColumns

            ///   containing the columns to match for duplicates</param>

            private static void RemoveDuplicates(DataTable tbl,

                                                 DataColumn[] keyColumns)

            {

                  int rowNdx = 0;

                  while(rowNdx < tbl.Rows.Count-1)

                  {

                        DataRow[] dups = FindDups(tbl, rowNdx, keyColumns);

                        if(dups.Length>0)

                        {

                              foreach(DataRow dup in dups)

                              {

                                    tbl.Rows.Remove(dup);

                              }

                        }

                        else

                        {

                              rowNdx++;

                        }

                  }

            }

 

            private static DataRow[] FindDups(DataTable tbl,

                                              int sourceNdx,

                                              DataColumn[] keyColumns)

            {

                  ArrayList retVal = new ArrayList();

 

                  DataRow sourceRow = tbl.Rows[sourceNdx];

                  for(int i=sourceNdx + 1; i<tbl.Rows.Count; i++)

                  {

                        DataRow targetRow = tbl.Rows[i];

                        if(IsDup(sourceRow, targetRow, keyColumns))

                        {

                              retVal.Add(targetRow);

                        }

                  }

                  return (DataRow[]) retVal.ToArray(typeof(DataRow));

            }

 

            private static bool IsDup(DataRow sourceRow,

                                      DataRow targetRow,

                                      DataColumn[] keyColumns)

            {

                  bool retVal = true;

                  foreach(DataColumn column in keyColumns)

                  {

                        retVal = retVal && sourceRow[column].Equals(targetRow[column]);

                        if(!retVal) break;

                  }

                  return retVal;

            }

 

            private static void PrintRows(DataTable tbl)

            {

                  for(int i=0; i<tbl.Rows.Count; i++)

                  {

                        Console.WriteLine("row: {0}, ColumnA: {1}, ColumnB: {2}", i, tbl.Rows[i]["ColumnA"], tbl.Rows[i]["ColumnB"]);

                  }

 

            }

      }

}

--------- End Demo Code ----------------

Hope it helps someone....

-Andy

Print | posted on Thursday, June 24, 2004 10:52 AM

Comments on this post

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thanks for this post. I'm moving a large amount of data to an in-memory cache. I was doing a SELECT DISTINCT before to remove the duplicates. This works great.
Left by Brian Scott on Aug 16, 2004 2:02 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thankyou very much. been trying to do this for about 6 hours. started to feel like i was loosing my mind in double vb.net loops but your soloution does the trick perfectly.

cheers
Left by mark a on Feb 04, 2005 9:24 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Hi this code you posted has helped me solve a problem I had which I had also spent ages looking at. Many thanks

Gregor Suttie
Left by Gregor Suttie on May 02, 2005 7:22 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Fantastic! Spent hours trying to get this to work properly. Thanks a bunch!
Left by Joe P. on May 02, 2005 8:59 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Awesome post , you saved my life :)
Left by RonnyLiwuh on May 10, 2005 6:55 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Fantastic! I have Spent hours trying to get this to work properly. Thanks a lot
Left by Naveen Chainani on Jan 05, 2006 2:17 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Dear Mr.Programmer,I'm so thanksfull about posted code for C# programmer in metter of removing duplicate rows fron DataTable.
Left by Srdjan Markovic on Mar 09, 2006 11:11 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Nice code, THanks a lot!!!!!!!
Left by Rajesh on Mar 12, 2006 8:15 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Nice code, but 5 minutes? LOL!
Left by Bill Clinton on May 25, 2006 11:26 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
How do you compare more than one column at a time?

I need to leave the row with most current date.

Here's the data:

\Product\Help\help.chm 06/23/2006 09:48 AM
\Product\Help\help.chm 08/23/2006 07:16 PM
\Product\Help\help.html 06/25/2006 08:31 AM
\Product\Help\help.html 08/25/2006 09:05 AM

( select name = name & Date > Date )
Left by Kevin on Aug 17, 2006 6:21 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thank you very much. This is a big big help!
Left by Indera on Oct 25, 2006 6:01 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
very good. But it's better if you have vb.net code :D
Thank you
Left by hieu on Dec 06, 2006 3:14 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Hi there,
You did a great job. I was looking for something similar.(I just wanted to create the list of duplicates and not remove them) I also had to rewrite the code in vb.net which was a breeze. I appreciate your putting on the web.
Saved me a lot of time.
thanks again

samir
Left by Samir on Dec 10, 2006 12:06 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thank you.
Left by Malay Thakershi on Feb 08, 2007 12:00 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Ok. Thanks for the post, but here's a method that runs about 100 times faster and uses much less code. It employes a Dictionary for a uniqueness test. Just replace the RemoveDuplicates Method from the example above with this one, and create a list of column names by adding them to a List<string> generic collection to feed into the method. For Example,

List<string> keyColumns = new List<string>();
keyColumns.Add("ColumnA");
keyColumns.Add("ColumnB");

RemoveDuplicates(tbl,keyColumns);

private void RemoveDuplicates(DataTable table, List<string> keyColumns)
{
Dictionary<string, string> uniquenessDict = new Dictionary<string, string>(table.Rows.Count);
StringBuilder sb = null;
int rowIndex = 0;
DataRow row;
DataRowCollection rows = table.Rows;
while (rowIndex < rows.Count - 1)
{
row = rows[rowIndex];
sb = new StringBuilder();
foreach (string colname in keyColumns)
{
sb.Append(((string)row[colname]));
}

if (uniquenessDict.ContainsKey(sb.ToString()))
{
rows.Remove(row);
}
else
{
uniquenessDict.Add(sb.ToString(), string.Empty);
rowIndex++;
}
}
Left by Kevin Morris on Apr 07, 2007 2:29 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
How do you remove triplicates?
Left by Norm on May 10, 2007 8:31 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Kevin, thaks a lot. Your code rocks.

Really fast, short, simple and very flexible.

That's the way it should be!

Thank you!
Left by Dan on May 18, 2007 2:16 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
wow, amazing, this is what i need, i thought of several different ways but i think yours is faster.

Thank you (^_^)v
Left by silenr0c on Jul 06, 2008 9:32 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
by the way, i made major modification to your idea, because i don't need to search every columns, i'll write in my blog later, please come by (^_^)
Left by silenr0c on Jul 07, 2008 2:05 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
thanks kevin and Andy,good job, too fast...
Left by desper on Jul 24, 2008 12:08 PM

Your comment:

 (will show your gravatar)
 
Please add 5 and 3 and type the answer here: