Job Search

Saturday, March 31, 2018

Delete duplicate rows from Oracle Database tables

Use subquery to delete duplicate rows

SELECT *
  FROM employees a
 WHERE ROWID > (SELECT MIN(ROWID)
                  FROM employees b
                 WHERE b.first_name = a.first_name
                   AND b.last_name = a.last_name);

SELECT *
  FROM employees a
 WHERE a.rowid > ANY (SELECT b.rowid
          FROM employees b
         WHERE a.first_name = b.first_name
           AND a.last_name = b.last_name);

DELETE FROM employees a
 WHERE a.rowid > ANY (SELECT b.rowid
          FROM employees b
         WHERE a.first_name = b.first_name
           AND a.last_name = b.last_name);


Use RANK to delete duplicate rows

SELECT *
  FROM employees
 WHERE ROWID IN (SELECT "rowid"
                   FROM (SELECT "rowid", rank_n
                           FROM (SELECT first_name,
                                        last_name,
                                        rank() over(PARTITION BY first_name, last_name ORDER BY ROWID) rank_n,
                                        ROWID AS "rowid"
                                   FROM employees
                                  WHERE (first_name, last_name) IN
                                        (SELECT first_name, last_name
                                           FROM employees
                                          GROUP BY first_name, last_name
                                         HAVING COUNT(*) > 1)))
                  WHERE rank_n > 1);

DELETE FROM employees
 WHERE ROWID IN (SELECT "rowid"
                   FROM (SELECT "rowid", rank_n
                           FROM (SELECT first_name,
                                        last_name,
                                        rank() over(PARTITION BY first_name, last_name ORDER BY ROWID) rank_n,
                                        ROWID AS "rowid"
                                   FROM employees
                                  WHERE (first_name, last_name) IN
                                        (SELECT first_name, last_name
                                           FROM employees
                                          GROUP BY first_name, last_name
                                         HAVING COUNT(*) > 1)))
                  WHERE rank_n > 1);

Use analytics to delete duplicate rows

SELECT *
  FROM employees
 WHERE ROWID IN (SELECT ROWID
                   FROM (SELECT ROWID,
                                row_number() over(PARTITION BY first_name, last_name ORDER BY first_name, last_name) dup
                           FROM employees)
                  WHERE dup > 1);

DELETE FROM employees
 WHERE ROWID IN (SELECT ROWID
                   FROM (SELECT ROWID,
                                row_number() over(PARTITION BY first_name, last_name ORDER BY first_name, last_name) dup
                           FROM employees)
                  WHERE dup > 1);

Removing duplicate table rows where rows have NULL values

SELECT *
  FROM employees a
 WHERE a.rowid > ANY
 (SELECT b.rowid
          FROM employees b
         WHERE (a.first_name = b.first_name OR
               (a.first_name IS NULL AND b.first_name IS NULL))
           AND (a.manager_id = b.manager_id OR
               (a.manager_id IS NULL AND b.manager_id IS NULL)));

DELETE FROM employees a
 WHERE a.rowid > ANY
 (SELECT b.rowid
          FROM employees b
         WHERE (a.first_name = b.first_name OR
               (a.first_name IS NULL AND b.first_name IS NULL))
           AND (a.manager_id = b.manager_id OR
               (a.manager_id IS NULL AND b.manager_id IS NULL)));


I hope you all have enjoyed reading this article. Comments are welcome....