SQL
Interview Questions - 50 Comprehensive Q&A
Basic to Advanced SQL
Concepts
1. Explain
Different Types of Joins with Examples
Tables Setup:
Table 1 (Employees): Table 2 (Departments):
EMP_ID | NAME DEPT_ID | DEPT_NAME
1
| John 1
| HR
2
| Jane 2
| IT
3
| Mike 3
| Finance
4
| Sarah NULL
| NULL
Inner Join:
SELECT e.NAME, d.DEPT_NAME
FROM Employees e
INNER JOIN Departments d ON e.EMP_ID = d.DEPT_ID;
-- Result: Only
matching records (John-HR, Jane-IT)
Left Join:
SELECT e.NAME, d.DEPT_NAME
FROM Employees e
LEFT JOIN Departments d ON e.EMP_ID = d.DEPT_ID;
-- Result: All
employees + matching departments (includes Mike, Sarah with NULL)
Right Join:
SELECT e.NAME, d.DEPT_NAME
FROM Employees e
RIGHT JOIN Departments d ON e.EMP_ID = d.DEPT_ID;
-- Result: All
departments + matching employees (includes Finance with NULL)
Full Outer Join:
SELECT e.NAME, d.DEPT_NAME
FROM Employees e
FULL OUTER JOIN Departments d ON e.EMP_ID = d.DEPT_ID;
-- Result: All
records from both tables
2. Find 2nd,
3rd, Nth Highest Salary
Method 1 - Using
ROW_NUMBER():
-- 2nd Highest Salary
SELECT salary
FROM (
SELECT salary, ROW_NUMBER() OVER (ORDER BY salary DESC) as rn
FROM employees
) WHERE rn = 2;
-- Nth Highest
Salary (replace N with desired number)
SELECT salary
FROM (
SELECT salary, ROW_NUMBER() OVER (ORDER BY salary DESC) as rn
FROM employees
) WHERE rn = N;
Method 2 - Using
DENSE_RANK() (handles duplicates):
SELECT salary
FROM (
SELECT salary, DENSE_RANK() OVER (ORDER BY salary DESC) as rank
FROM employees
) WHERE rank = 2;
3. Pivot Table -
Convert Rows to Columns
Input Table
(Student Marks):
CREATE TABLE student_marks (
ID VARCHAR(2),
Name VARCHAR(20),
Subject VARCHAR(10),
Mark INT
);
INSERT INTO student_marks VALUES
('01', 'Ishara', 'Maths', 40),
('01', 'Ishara', 'Science', 60),
('02', 'Tharaka', 'Maths', 60),
('03', 'Rukshan', 'Maths', 20),
('03', 'Rukshan', 'Science', 70);
Solution - Pivot
using CASE:
SELECT
ID,
Name,
MAX(CASE WHEN Subject = 'Maths' THEN Mark END) as Maths_Mark,
MAX(CASE WHEN Subject = 'Science' THEN Mark END) as Science_Mark
FROM student_marks
GROUP BY ID, Name
ORDER BY ID;
Result:
ID | Name | Maths_Mark | Science_Mark
01 | Ishara | 40
| 60
02 | Tharaka | 60 | NULL
03 | Rukshan | 20 | 70
4. Generate All
Team Match Combinations
Input Table
(Teams):
CREATE TABLE teams (
id INT,
team_name VARCHAR(20)
);
INSERT INTO teams VALUES
(1, 'India'),
(2, 'Pakistan'),
(3, 'Australia'),
(4, 'England');
Solution - Self
Join:
SELECT
t1.team_name + ' vs ' + t2.team_name as Match_Combination
FROM teams t1
CROSS JOIN teams t2
WHERE t1.id < t2.id
ORDER BY t1.id, t2.id;
Result:
India vs Pakistan
India vs Australia
India vs England
Pakistan vs Australia
Pakistan vs England
Australia vs England
5. Handle NULL
Values in Comparisons
Input Table:
CREATE TABLE sample_data (
A INT,
B INT
);
INSERT INTO sample_data VALUES
(1, 1),
(1, 1),
(2, 1),
(2, 2),
(3, NULL),
(NULL, NULL),
(NULL, NULL);
Find rows where A
= B (including NULLs):
SELECT * FROM sample_data
WHERE (A = B) OR (A IS NULL AND B IS NULL);
Find rows where A
≠ B (excluding NULLs):
SELECT * FROM sample_data
WHERE A != B AND A IS NOT NULL AND B IS NOT NULL;
6. Find
Duplicate Records
Method 1 - Using
Window Functions:
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY emp_id ORDER BY created_date DESC) as rn
FROM employees
) WHERE rn > 1;
Method 2 - Using
GROUP BY:
SELECT emp_id, COUNT(*) as duplicate_count
FROM employees
GROUP BY emp_id
HAVING COUNT(*) > 1;
7. Delete
Duplicate Records (Keep Latest)
DELETE FROM employees
WHERE id NOT IN (
SELECT MIN(id)
FROM employees
GROUP BY emp_id, emp_name
);
8. Running
Totals and Moving Averages
Running Total:
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date ROWS UNBOUNDED PRECEDING) as running_total
FROM sales
ORDER BY sale_date;
Moving Average
(3-day):
SELECT
sale_date,
amount,
AVG(amount) OVER (ORDER BY sale_date ROWS 2 PRECEDING) as moving_avg_3day
FROM sales;
9. Employees
Earning More Than Department Average
SELECT e1.emp_name, e1.salary, e1.department
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department = e1.department
);
10. Find
Employees with No Manager
SELECT e1.*
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.emp_id
WHERE e2.emp_id IS NULL AND e1.manager_id IS NOT NULL;
11. Hierarchical
Data - Employee Reporting Chain
Using Recursive
CTE:
WITH RECURSIVE emp_hierarchy AS (
--
Anchor: Top-level managers
SELECT emp_id, emp_name, manager_id, 1 as level,
CAST(emp_name AS VARCHAR(1000)) as hierarchy_path
FROM employees
WHERE manager_id IS NULL
UNION ALL
--
Recursive: Subordinates
SELECT e.emp_id, e.emp_name, e.manager_id, eh.level + 1,
CAST(eh.hierarchy_path + ' -> ' + e.emp_name AS VARCHAR(1000))
FROM employees e
INNER JOIN emp_hierarchy eh ON e.manager_id = eh.emp_id
)
SELECT * FROM emp_hierarchy ORDER BY level, emp_name;
12. Calculate
Age from Birth Date
SELECT
emp_name,
birth_date,
DATEDIFF(YEAR, birth_date, GETDATE()) as age_years,
DATEDIFF(MONTH, birth_date, GETDATE()) as age_months,
DATEDIFF(DAY, birth_date, GETDATE()) as age_days
FROM employees;
13. First and
Last Day of Month
SELECT
DATEPART(YEAR, GETDATE()) as current_year,
DATEPART(MONTH, GETDATE()) as current_month,
DATEFROMPARTS(YEAR(GETDATE()), MONTH(GETDATE()), 1) as first_day_month,
EOMONTH(GETDATE()) as last_day_month,
DATEADD(MONTH, -1, GETDATE()) as same_day_last_month;
14. Advanced
Window Functions
Rank, Dense_Rank,
Row_Number Comparison:
SELECT
emp_name,
salary,
RANK() OVER (ORDER BY salary DESC) as rank_val,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank_val,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
LAG(salary) OVER (ORDER BY salary DESC) as prev_salary,
LEAD(salary) OVER (ORDER BY salary DESC) as next_salary
FROM employees;
15. Percentage
of Total
SELECT
department,
SUM(salary) as dept_total,
ROUND(SUM(salary) * 100.0 / (SELECT SUM(salary) FROM employees), 2) as percentage_of_total
FROM employees
GROUP BY department;
16. CASE
Statement with Multiple Conditions
SELECT
emp_name,
salary,
CASE
WHEN salary >= 100000 THEN 'Senior Level'
WHEN salary >= 70000 AND department = 'IT' THEN 'Mid-Senior IT'
WHEN salary >= 50000 THEN 'Mid Level'
WHEN salary >= 30000 THEN 'Junior Level'
ELSE 'Entry Level'
END as salary_grade
FROM employees;
17. Find Records
with Maximum Value per Group
SELECT e1.*
FROM employees e1
INNER JOIN (
SELECT department, MAX(salary) as max_salary
FROM employees
GROUP BY department
) e2 ON e1.department = e2.department AND e1.salary = e2.max_salary;
18. Common Table
Expressions (CTE) for Complex Queries
WITH dept_stats AS (
SELECT
department,
AVG(salary) as avg_salary,
COUNT(*) as emp_count
FROM employees
GROUP BY department
),
high_performing_depts AS (
SELECT department
FROM dept_stats
WHERE avg_salary > 60000 AND emp_count > 5
)
SELECT e.*
FROM employees e
INNER JOIN high_performing_depts hpd ON e.department = hpd.department;
19. String
Manipulation and Pattern Matching
-- Extract domain from
email
SELECT
emp_name,
email,
SUBSTRING(email, CHARINDEX('@', email) + 1, LEN(email)) as email_domain,
CASE
WHEN email LIKE '%@company.com' THEN 'Internal'
ELSE 'External'
END as email_type
FROM employees
WHERE email IS NOT NULL;
20. Date Range
Queries
-- Employees hired in last
6 months
SELECT *
FROM employees
WHERE hire_date >= DATEADD(MONTH, -6, GETDATE());
-- Employees hired
between two dates
SELECT *
FROM employees
WHERE hire_date BETWEEN '2023-01-01' AND '2023-12-31';
21. UNION vs
UNION ALL
-- UNION (removes
duplicates)
SELECT emp_name FROM employees WHERE department = 'IT'
UNION
SELECT emp_name FROM employees WHERE salary > 50000;
-- UNION ALL
(keeps duplicates)
SELECT emp_name FROM employees WHERE department = 'IT'
UNION ALL
SELECT emp_name FROM employees WHERE salary > 50000;
22. Subqueries
vs EXISTS
-- Using IN (subquery)
SELECT * FROM employees
WHERE department_id IN (
SELECT id FROM departments WHERE location = 'New York'
);
-- Using EXISTS
(often more efficient)
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.id = e.department_id AND d.location = 'New York'
);
23. Self Join -
Find Employees in Same Department
SELECT
e1.emp_name as employee1,
e2.emp_name as employee2,
e1.department
FROM employees e1
INNER JOIN employees e2 ON e1.department = e2.department
WHERE e1.emp_id < e2.emp_id;
24. Calculate
Working Days Between Dates
SELECT
emp_name,
hire_date,
GETDATE() as current_date,
DATEDIFF(DAY, hire_date, GETDATE()) -
(DATEDIFF(WEEK, hire_date, GETDATE()) * 2) -
CASE WHEN DATENAME(WEEKDAY, hire_date) = 'Sunday' THEN 1 ELSE 0 END -
CASE WHEN DATENAME(WEEKDAY, GETDATE()) = 'Saturday' THEN 1 ELSE 0 END
as working_days
FROM employees;
25. Handle
Division by Zero
SELECT
department,
total_budget,
emp_count,
CASE
WHEN emp_count = 0 THEN 0
ELSE total_budget / emp_count
END as budget_per_employee
FROM department_budget;
26. Top N
Records per Group
-- Top 2 highest paid
employees per department
SELECT *
FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rn
FROM employees
) ranked
WHERE rn <= 2;
27. UPDATE with
JOIN
UPDATE e
SET e.department_name = d.dept_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id
WHERE e.department_name IS NULL;
28. Conditional
Aggregation
SELECT
department,
COUNT(*) as total_employees,
COUNT(CASE WHEN salary > 50000 THEN 1 END) as high_earners,
AVG(CASE WHEN gender = 'M' THEN salary END) as avg_male_salary,
AVG(CASE WHEN gender = 'F' THEN salary END) as avg_female_salary
FROM employees
GROUP BY department;
29. Find Gaps in
Sequential Data
-- Find missing IDs in
sequence
WITH number_sequence AS (
SELECT 1 as num
UNION ALL
SELECT num + 1
FROM number_sequence
WHERE num < (SELECT MAX(emp_id) FROM employees)
)
SELECT ns.num as missing_id
FROM number_sequence ns
LEFT JOIN employees e ON ns.num = e.emp_id
WHERE e.emp_id IS NULL;
30. Calculate
Median
-- Median salary
SELECT
AVG(salary) as median_salary
FROM (
SELECT salary,
ROW_NUMBER() OVER (ORDER BY salary) as row_asc,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_desc
FROM employees
) t
WHERE row_asc IN (row_desc, row_desc - 1, row_desc + 1);
31. Data
Validation Queries
-- Check data quality
SELECT
'Missing Names' as issue,
COUNT(*) as count
FROM employees
WHERE emp_name IS NULL OR emp_name = ''
UNION ALL
SELECT
'Negative Salaries',
COUNT(*)
FROM employees
WHERE salary < 0
UNION ALL
SELECT
'Future Hire Dates',
COUNT(*)
FROM employees
WHERE hire_date > GETDATE();
32. Performance
Query - Index Usage
-- Check for queries that
might need indexes
SELECT
query_text,
execution_count,
total_worker_time,
avg_worker_time = total_worker_time / execution_count
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle)
st
ORDER BY avg_worker_time DESC;
33. Dynamic
Pivot (Variable Columns)
DECLARE @cols NVARCHAR(MAX), @query NVARCHAR(MAX)
SELECT @cols = STUFF((
SELECT DISTINCT ',' + QUOTENAME(Subject)
FROM student_marks
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
SET @query = '
SELECT ID, Name, ' + @cols + '
FROM (
SELECT ID, Name, Subject, Mark
FROM student_marks
) src
PIVOT (
MAX(Mark) FOR Subject IN (' + @cols + ')
) piv'
EXEC sp_executesql @query
34. Calculate
Business Days
CREATE FUNCTION dbo.GetBusinessDays(@StartDate DATE, @EndDate DATE)
RETURNS INT
AS
BEGIN
RETURN (
DATEDIFF(DAY, @StartDate, @EndDate) -
(DATEDIFF(WEEK, @StartDate, @EndDate) * 2) -
CASE WHEN DATENAME(WEEKDAY, @StartDate) = 'Sunday' THEN 1 ELSE 0 END -
CASE WHEN DATENAME(WEEKDAY, @EndDate) = 'Saturday' THEN 1 ELSE 0 END
)
END
35. Row-Level
Security Example
-- Create policy for
row-level security
CREATE SECURITY POLICY emp_security_policy
ADD FILTER PREDICATE
dbo.fn_securitypredicate(department) ON dbo.employees
WITH (STATE = ON);
36. JSON Data
Handling
-- Extract data from JSON
column
SELECT
emp_id,
JSON_VALUE(additional_info, '$.phone') as phone,
JSON_VALUE(additional_info, '$.address.city') as city
FROM employees
WHERE ISJSON(additional_info) = 1;
37. Temporal
Tables (SQL Server)
-- Query historical data
SELECT *
FROM employees
FOR SYSTEM_TIME AS OF '2023-01-01'
WHERE emp_id = 123;
38. Advanced
Error Handling
BEGIN TRY
BEGIN TRANSACTION
UPDATE employees SET salary = salary * 1.1 WHERE department = 'IT';
IF @@ROWCOUNT = 0
THROW 50001, 'No IT employees found to update', 1;
COMMIT TRANSACTION
END TRY
BEGIN CATCH
ROLLBACK TRANSACTION
SELECT
ERROR_NUMBER() as ErrorNumber,
ERROR_MESSAGE() as ErrorMessage,
ERROR_PROCEDURE() as ErrorProcedure
END CATCH
39. Data Masking
for Sensitive Information
SELECT
emp_id,
LEFT(emp_name, 1) + REPLICATE('*', LEN(emp_name) - 2) + RIGHT(emp_name, 1) as masked_name,
LEFT(email, 2) + REPLICATE('*', CHARINDEX('@', email) - 3) + RIGHT(email, LEN(email) - CHARINDEX('@', email) + 1) as masked_email
FROM employees;
40. Generate
Sequential Numbers
-- Generate numbers 1 to
100
WITH numbers AS (
SELECT 1 as n
UNION ALL
SELECT n + 1
FROM numbers
WHERE n < 100
)
SELECT n FROM numbers;
41. Calculate
Cumulative Percentage
SELECT
emp_name,
salary,
SUM(salary) OVER (ORDER BY salary DESC ROWS UNBOUNDED PRECEDING) as cumulative_salary,
ROUND(
SUM(salary) OVER (ORDER BY salary DESC ROWS UNBOUNDED PRECEDING) * 100.0 /
SUM(salary) OVER (), 2
) as cumulative_percentage
FROM employees;
42. Find
Overlapping Date Ranges
SELECT
p1.project_name as project1,
p2.project_name as project2,
p1.start_date,
p1.end_date
FROM projects p1
INNER JOIN projects p2 ON p1.project_id < p2.project_id
WHERE p1.start_date <= p2.end_date AND p1.end_date >= p2.start_date;
43. Calculate
Age in Years, Months, Days
SELECT
emp_name,
birth_date,
DATEDIFF(YEAR, birth_date, GETDATE()) -
CASE
WHEN MONTH(birth_date) > MONTH(GETDATE()) OR
(MONTH(birth_date) = MONTH(GETDATE()) AND DAY(birth_date) > DAY(GETDATE()))
THEN 1
ELSE 0
END as age_years
FROM employees;
44. Conditional
JOIN
SELECT
e.emp_name,
CASE
WHEN e.level = 'Senior' THEN sm.manager_name
ELSE jm.manager_name
END as manager_name
FROM employees e
LEFT JOIN senior_managers sm ON e.manager_id = sm.manager_id AND e.level = 'Senior'
LEFT JOIN junior_managers jm ON e.manager_id = jm.manager_id AND e.level != 'Senior';
45. Find
Patterns in Data
-- Find employees with
salary increases > 20% year over year
SELECT
emp_id,
review_year,
salary,
LAG(salary) OVER (PARTITION BY emp_id ORDER BY review_year) as prev_salary,
CASE
WHEN LAG(salary) OVER (PARTITION BY emp_id ORDER BY review_year) > 0
THEN ROUND(((salary - LAG(salary) OVER (PARTITION BY emp_id ORDER BY review_year)) * 100.0 /
LAG(salary) OVER (PARTITION BY emp_id ORDER BY review_year)), 2)
END as salary_increase_percentage
FROM salary_history
WHERE salary > LAG(salary) OVER (PARTITION BY emp_id ORDER BY review_year) * 1.2;
46. Advanced
GROUP BY with HAVING
-- Departments with salary
variance > threshold
SELECT
department,
COUNT(*) as emp_count,
AVG(salary) as avg_salary,
STDEV(salary) as salary_std_dev,
VAR(salary) as salary_variance
FROM employees
GROUP BY department
HAVING COUNT(*) >= 3
AND STDEV(salary) > 10000
ORDER BY salary_variance DESC;
47. Dynamic SQL
for Flexible Queries
DECLARE @department VARCHAR(50) = 'IT'
DECLARE @min_salary INT = 50000
DECLARE @sql NVARCHAR(MAX)
SET @sql = '
SELECT * FROM employees
WHERE 1=1'
IF @department IS NOT NULL
SET @sql = @sql + ' AND department = @dept'
IF @min_salary IS NOT NULL
SET @sql = @sql + ' AND salary >= @min_sal'
EXEC sp_executesql @sql,
N'@dept VARCHAR(50), @min_sal INT',
@dept = @department,
@min_sal = @min_salary
48. Handling
Multiple Delimiters in Data
-- Split comma-separated
skills into rows
WITH split_skills AS (
SELECT
emp_id,
TRIM(value) as skill
FROM employees
CROSS APPLY STRING_SPLIT(skills, ',')
WHERE skills IS NOT NULL
)
SELECT
skill,
COUNT(*) as employee_count
FROM split_skills
GROUP BY skill
ORDER BY employee_count DESC;
49. Calculate
Retention Rate
SELECT
hire_year,
total_hired,
still_employed,
ROUND((still_employed * 100.0 / total_hired), 2) as retention_rate
FROM (
SELECT
YEAR(hire_date) as hire_year,
COUNT(*) as total_hired,
SUM(CASE WHEN termination_date IS NULL THEN 1 ELSE 0 END) as still_employed
FROM employees
GROUP BY YEAR(hire_date)
) retention_stats
ORDER BY hire_year;
50. Complex
Performance Analysis Query
-- Employee performance
ranking with multiple criteria
WITH performance_metrics AS (
SELECT
e.emp_id,
e.emp_name,
e.department,
e.salary,
COALESCE(p.performance_score, 0) as performance_score,
COALESCE(a.attendance_percentage, 0) as attendance,
DATEDIFF(MONTH, e.hire_date, GETDATE()) as tenure_months
FROM employees e
LEFT JOIN performance_reviews p ON e.emp_id = p.emp_id
AND p.review_year = YEAR(GETDATE())
LEFT JOIN attendance_records a ON e.emp_id = a.emp_id
AND a.year = YEAR(GETDATE())
),
weighted_scores AS (
SELECT *,
(performance_score * 0.4 +
attendance * 0.3 +
CASE WHEN tenure_months > 12 THEN 100 ELSE tenure_months * 8.33 END * 0.3) as overall_score
FROM performance_metrics
)
SELECT
emp_name,
department,
performance_score,
attendance,
tenure_months,
ROUND(overall_score, 2) as overall_score,
RANK() OVER (ORDER BY overall_score DESC) as company_rank,
RANK() OVER (PARTITION BY department ORDER BY overall_score DESC) as dept_rank
FROM weighted_scores
ORDER BY overall_score DESC;
Performance Tips
1.
Use appropriate indexes on columns used in WHERE, JOIN, and ORDER BY clauses
2.
**Avoid SELECT *** - specify
only needed columns
3.
Use EXISTS instead of IN for subqueries when possible
4.
Use UNION ALL instead of
UNION when duplicates don’t matter
5.
Limit result sets using TOP, LIMIT, or WHERE conditions
6.
Use window functions instead of self-joins when possible
7.
Analyze execution plans to identify bottlenecks
8.
Consider partitioning for very large tables
9.
Use appropriate data types - don’t over-allocate
10. Update statistics regularly for optimal query plans
No comments:
Post a Comment