remove duplicate rows in csv file python

38

# credit to the Stack Overflow user in the source link

with open('file_with_duplicates.csv','r') as in_file, open('ouput.csv','w') as out_file:
  
    seen = set() # set for fast O(1) amortized lookup
    
    for line in in_file:
        if line in seen: 
          continue # skip duplicate

        seen.add(line)
        out_file.write(line)

Comments

Submit
0 Comments