How to remove duplicate rows from a DataTable....

Someone at work asked this question so I spent five minutes and banged out this code. If it helps one person, it probably will help many so here it is....

--------- Demo Removing Duplicate rows in a DataTable ----------------

using System;

using System.Data;

using System.Collections;

 

namespace Demo_DataTableRemoveDupRows

{

      class Class1

      {

            [STAThread]

            static void Main(string[] args)

            {

 

                  // create an example datatable with duplicate rows

                  DataTable tbl = new DataTable();

 

                  tbl.Columns.Add("ColumnA");

                  tbl.Columns.Add("ColumnB");

                  tbl.Columns.Add("ColumnC");

                  for(int i = 0; i<10; i++)

                  {

                        DataRow nr = tbl.NewRow();

                        nr["ColumnA"] = "A" + i.ToString();

                        nr["ColumnB"] = "B" + i.ToString();

                        nr["ColumnC"] = "C" + i.ToString();

                        tbl.Rows.Add(nr);

                        // duplicate

                        nr = tbl.NewRow();

                        nr["ColumnA"] = "A" + i.ToString();

                        nr["ColumnB"] = "B" + i.ToString();

                        nr["ColumnC"] = "C" + i.ToString();

                        tbl.Rows.Add(nr);

                  }

 

                  PrintRows(tbl); // show table with duplicates

 

                  //Create an array of DataColumns to compare

                  //If these columns all match we consider the

                  //rows duplicate.

                  DataColumn[] keyColumns =

                              new DataColumn[]{tbl.Columns["ColumnA"],

                                               tbl.Columns["ColumnA"]};

 

                  //remove the duplicates

                  RemoveDuplicates(tbl, keyColumns);

 

 

                  PrintRows(tbl); // show table again

                  Console.ReadLine(); // pause to view output

 

            }

           

            /// <summary>

            /// Removes duplicate rows from given DataTable

            /// </summary>

            /// <param name="tbl">Table to scan for duplicate rows</param>

            /// <param name="KeyColumns">An array of DataColumns

            ///   containing the columns to match for duplicates</param>

            private static void RemoveDuplicates(DataTable tbl,

                                                 DataColumn[] keyColumns)

            {

                  int rowNdx = 0;

                  while(rowNdx < tbl.Rows.Count-1)

                  {

                        DataRow[] dups = FindDups(tbl, rowNdx, keyColumns);

                        if(dups.Length>0)

                        {

                              foreach(DataRow dup in dups)

                              {

                                    tbl.Rows.Remove(dup);

                              }

                        }

                        else

                        {

                              rowNdx++;

                        }

                  }

            }

 

            private static DataRow[] FindDups(DataTable tbl,

                                              int sourceNdx,

                                              DataColumn[] keyColumns)

            {

                  ArrayList retVal = new ArrayList();

 

                  DataRow sourceRow = tbl.Rows[sourceNdx];

                  for(int i=sourceNdx + 1; i<tbl.Rows.Count; i++)

                  {

                        DataRow targetRow = tbl.Rows[i];

                        if(IsDup(sourceRow, targetRow, keyColumns))

                        {

                              retVal.Add(targetRow);

                        }

                  }

                  return (DataRow[]) retVal.ToArray(typeof(DataRow));

            }

 

            private static bool IsDup(DataRow sourceRow,

                                      DataRow targetRow,

                                      DataColumn[] keyColumns)

            {

                  bool retVal = true;

                  foreach(DataColumn column in keyColumns)

                  {

                        retVal = retVal && sourceRow[column].Equals(targetRow[column]);

                        if(!retVal) break;

                  }

                  return retVal;

            }

 

            private static void PrintRows(DataTable tbl)

            {

                  for(int i=0; i<tbl.Rows.Count; i++)

                  {

                        Console.WriteLine("row: {0}, ColumnA: {1}, ColumnB: {2}", i, tbl.Rows[i]["ColumnA"], tbl.Rows[i]["ColumnB"]);

                  }

 

            }

      }

}

--------- End Demo Code ----------------

Hope it helps someone....

-Andy

Print | posted on Thursday, June 24, 2004 10:52 AM

Comments on this post

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thanks for this post. I'm moving a large amount of data to an in-memory cache. I was doing a SELECT DISTINCT before to remove the duplicates. This works great.
Left by Brian Scott on Aug 16, 2004 2:02 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thankyou very much. been trying to do this for about 6 hours. started to feel like i was loosing my mind in double vb.net loops but your soloution does the trick perfectly.

cheers
Left by mark a on Feb 04, 2005 9:24 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Hi this code you posted has helped me solve a problem I had which I had also spent ages looking at. Many thanks

Gregor Suttie
Left by Gregor Suttie on May 02, 2005 7:22 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Fantastic! Spent hours trying to get this to work properly. Thanks a bunch!
Left by Joe P. on May 02, 2005 8:59 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Awesome post , you saved my life :)
Left by RonnyLiwuh on May 10, 2005 6:55 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Fantastic! I have Spent hours trying to get this to work properly. Thanks a lot
Left by Naveen Chainani on Jan 05, 2006 2:17 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Dear Mr.Programmer,I'm so thanksfull about posted code for C# programmer in metter of removing duplicate rows fron DataTable.
Left by Srdjan Markovic on Mar 09, 2006 11:11 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Nice code, THanks a lot!!!!!!!
Left by Rajesh on Mar 12, 2006 8:15 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Nice code, but 5 minutes? LOL!
Left by Bill Clinton on May 25, 2006 11:26 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
How do you compare more than one column at a time?

I need to leave the row with most current date.

Here's the data:

\Product\Help\help.chm 06/23/2006 09:48 AM
\Product\Help\help.chm 08/23/2006 07:16 PM
\Product\Help\help.html 06/25/2006 08:31 AM
\Product\Help\help.html 08/25/2006 09:05 AM

( select name = name & Date > Date )
Left by Kevin on Aug 17, 2006 6:21 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thank you very much. This is a big big help!
Left by Indera on Oct 25, 2006 6:01 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
very good. But it's better if you have vb.net code :D
Thank you
Left by hieu on Dec 06, 2006 3:14 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Hi there,
You did a great job. I was looking for something similar.(I just wanted to create the list of duplicates and not remove them) I also had to rewrite the code in vb.net which was a breeze. I appreciate your putting on the web.
Saved me a lot of time.
thanks again

samir
Left by Samir on Dec 10, 2006 12:06 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thank you.
Left by Malay Thakershi on Feb 08, 2007 12:00 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Ok. Thanks for the post, but here's a method that runs about 100 times faster and uses much less code. It employes a Dictionary for a uniqueness test. Just replace the RemoveDuplicates Method from the example above with this one, and create a list of column names by adding them to a List<string> generic collection to feed into the method. For Example,

List<string> keyColumns = new List<string>();
keyColumns.Add("ColumnA");
keyColumns.Add("ColumnB");

RemoveDuplicates(tbl,keyColumns);

private void RemoveDuplicates(DataTable table, List<string> keyColumns)
{
Dictionary<string, string> uniquenessDict = new Dictionary<string, string>(table.Rows.Count);
StringBuilder sb = null;
int rowIndex = 0;
DataRow row;
DataRowCollection rows = table.Rows;
while (rowIndex < rows.Count - 1)
{
row = rows[rowIndex];
sb = new StringBuilder();
foreach (string colname in keyColumns)
{
sb.Append(((string)row[colname]));
}

if (uniquenessDict.ContainsKey(sb.ToString()))
{
rows.Remove(row);
}
else
{
uniquenessDict.Add(sb.ToString(), string.Empty);
rowIndex++;
}
}
Left by Kevin Morris on Apr 07, 2007 2:29 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
How do you remove triplicates?
Left by Norm on May 10, 2007 8:31 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Kevin, thaks a lot. Your code rocks.

Really fast, short, simple and very flexible.

That's the way it should be!

Thank you!
Left by Dan on May 18, 2007 2:16 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
wow, amazing, this is what i need, i thought of several different ways but i think yours is faster.

Thank you (^_^)v
Left by silenr0c on Jul 06, 2008 9:32 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
by the way, i made major modification to your idea, because i don't need to search every columns, i'll write in my blog later, please come by (^_^)
Left by silenr0c on Jul 07, 2008 2:05 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
thanks kevin and Andy,good job, too fast...
Left by desper on Jul 24, 2008 12:08 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thanks Andy for your effort an welling to help us :)
but I think this code is removing duplicates only if they exist sequently , like 1-2, 3-4 but not 1-9 !

Thanks Kevin I used your code :)
Left by Iman on Oct 21, 2008 2:52 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
GREAT JOB!
Left by Jelo on Dec 01, 2008 7:50 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Here is the best and most efficient way to accomplish what you want.

DataTable tbl;

tbl = tbl.DefaultView.ToTable(true,"columnname");
Left by Dave Edwards on Jan 08, 2009 9:45 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Thanks! This is what i needed and it's very useful and fast.
Left by Thiago Seffrin on Jan 22, 2009 3:07 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
No need of all this code..u can just use this single line of code

dt.DefaultView.ToTable( true, "employeeid");

where dt is DataTable object

First option in ToTable is a boolean which indicates, you want distinct rows or not?

Second option in the ToTable is the column name based on which we have to select distinct rows.
Left by pradeep on Feb 09, 2009 12:34 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
OR YOU CAN USE THIS ALSO...ITS VERY SIMPLE

Hashtable hshTblTemp = new Hashtable();
ArrayList arryListDuplicates = new ArrayList(); // contains the duplicate rows

foreach (DataRow dr in dtTable.Rows)
{
if (hshTblTemp.Contains(dr[colName]))
arryListDuplicates.Add(dr);
else
hshTblTemp.Add(dr[colName], dr["ID"].ToString());
}

foreach (DataRow dRow in arryListDuplicates)
dtTable.Rows.Remove(dRow);
Left by pradeep on Feb 09, 2009 12:36 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Nice piece of code, helped me a lot.
Thanks for the effort.
Left by Sikunj on Mar 09, 2009 4:16 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Hey Andy. Your really help m ea lot, may the god grace up on you. thanks
Left by Ashraf on Apr 23, 2009 3:12 PM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
you're life saver, thanks a lot!!!
Left by ozzy on Jun 18, 2009 6:49 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
Perfect man, all the codes are working fine.

Cheers everyone!
Left by Amir on Aug 28, 2009 3:43 AM

# re: How to remove duplicate rows from a DataTable....

Requesting Gravatar...
This code is very useful.

Thank you.
Left by Akbar Moradi on Sep 22, 2009 10:39 AM

# Yo Everybody!

Requesting Gravatar...
Hi!
New here and first post.
I am man, 26 years old from Spain
I enjoyed here.

Botijos
Left by Beeniashems on Oct 29, 2009 6:41 AM

Your comment:

 (will show your gravatar)