Python Fundamentals

Variables and Data Types

Python is dynamically typed — you do not declare the type, the interpreter figures it out. Variables are just names that point to objects in memory.

# Integers and floats
age = 25
temperature = 36.6

# Strings
name = "Emad"
greeting = f"Hello, {name}!"   # f-string interpolation

# Booleans
is_active = True
is_empty = False

# None — represents "no value"
result = None

# Check types at runtime
print(type(age))          #> <class 'int'>
print(type(temperature))  #> <class 'float'>
print(type(name))         #> <class 'str'>
print(type(is_active))    #> <class 'bool'>

Type conversions

# String to int / float
x = int("42")        # 42
y = float("3.14")    # 3.14

# Number to string
s = str(100)          # "100"

# Truthiness — these are all falsy in Python:
# False, 0, 0.0, "", [], {}, set(), None
print(bool(0))    #> False
print(bool([]))   #> False
print(bool("hi")) #> True

Key rule: everything in Python is an object. Even int and bool are objects with methods. 42 is an instance of int.

Lists

A list is an ordered, mutable sequence. It is the most used data structure in Python. Lists can hold mixed types, but in practice you usually keep them homogeneous.

Creating lists

# Literal
numbers = [1, 2, 3, 4, 5]
names   = ["Alice", "Bob", "Charlie"]
mixed   = [1, "hello", True, 3.14]  # valid but uncommon

# Empty list
empty = []

# From a range
digits = list(range(10))    # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

Accessing elements

fruits = ["apple", "banana", "cherry", "date", "elderberry"]

# Indexing — zero-based
print(fruits[0])     #> apple
print(fruits[2])     #> cherry
print(fruits[-1])    #> elderberry  (last item)
print(fruits[-2])    #> date        (second to last)

# Slicing — [start:stop:step]  (stop is exclusive)
print(fruits[1:3])   #> ['banana', 'cherry']
print(fruits[:3])    #> ['apple', 'banana', 'cherry']   (first 3)
print(fruits[2:])    #> ['cherry', 'date', 'elderberry'] (from index 2 onward)
print(fruits[::2])   #> ['apple', 'cherry', 'elderberry'] (every other)
print(fruits[::-1])  #> ['elderberry', 'date', 'cherry', 'banana', 'apple'] (reversed)

Modifying lists

nums = [10, 20, 30]

# Append — add to the end
nums.append(40)           # [10, 20, 30, 40]

# Insert — add at a specific index
nums.insert(1, 15)        # [10, 15, 20, 30, 40]

# Extend — add multiple items
nums.extend([50, 60])     # [10, 15, 20, 30, 40, 50, 60]

# Remove — by value (first occurrence)
nums.remove(15)           # [10, 20, 30, 40, 50, 60]

# Pop — remove by index and return it
last = nums.pop()         # last = 60, nums = [10, 20, 30, 40, 50]
second = nums.pop(1)      # second = 20, nums = [10, 30, 40, 50]

# Delete by index
del nums[0]               # [30, 40, 50]

# Sort in place
scores = [88, 45, 92, 67, 100]
scores.sort()             # [45, 67, 88, 92, 100]
scores.sort(reverse=True) # [100, 92, 88, 67, 45]

# Sorted — returns a new list, original unchanged
original = [3, 1, 4, 1, 5]
ordered  = sorted(original)  # [1, 1, 3, 4, 5]

Useful list operations

nums = [4, 2, 7, 2, 9, 1]

print(len(nums))       #> 6
print(min(nums))       #> 1
print(max(nums))       #> 9
print(sum(nums))       #> 25
print(nums.count(2))   #> 2        (how many 2s)
print(nums.index(7))   #> 2        (first index of 7)
print(7 in nums)       #> True     (membership check)
print(99 in nums)      #> False

Lists are mutable. When you assign b = a both names point to the same list. Use b = a.copy() or b = a[:] to make an independent copy.

Tuples and Dictionaries

Tuples — immutable sequences

A tuple is like a list that cannot be changed after creation. Use tuples for fixed collections like coordinates, RGB values, or database rows.

# Creating tuples
point = (3, 7)
rgb   = (255, 128, 0)
single = (42,)        # note the trailing comma — without it, (42) is just an int

# Accessing — same as lists
print(point[0])       #> 3
print(rgb[-1])        #> 0

# Unpacking — assign multiple variables at once
x, y = point
r, g, b = rgb
print(x, y)           #> 3 7

# Tuples are hashable — you can use them as dictionary keys
locations = {(40.7, -74.0): "New York", (35.7, 139.7): "Tokyo"}

Dictionaries — key-value pairs

A dictionary is an unordered (insertion-ordered since Python 3.7), mutable mapping from keys to values. Keys must be hashable (strings, numbers, tuples).

# Creating dicts
student = {"name": "Emad", "age": 25, "track": "AI/ML"}

# Accessing
print(student["name"])          #> Emad
print(student.get("gpa", 0.0)) #> 0.0  (default if key missing)

# Modifying
student["age"] = 26             # update existing key
student["university"] = "JU"   # add new key

# Removing
del student["track"]
gpa = student.pop("gpa", None) # remove and return, None if missing

# Iterating
for key in student:
    print(key, student[key])

for key, value in student.items():
    print(f"{key}: {value}")

# Useful methods
print(student.keys())    #> dict_keys(['name', 'age', 'university'])
print(student.values())  #> dict_values(['Emad', 26, 'JU'])
print(len(student))      #> 3
print("name" in student) #> True

dict.get(key, default) is safer than dict[key] — it returns the default instead of raising KeyError when the key doesn't exist.

Loops

Python has two loop types: for (iterate over a sequence) and while (repeat until a condition is false).

for loops

# Loop over a list
colors = ["red", "green", "blue"]
for color in colors:
    print(color)

# Loop over a range of numbers
for i in range(5):       # 0, 1, 2, 3, 4
    print(i)

for i in range(2, 8):    # 2, 3, 4, 5, 6, 7
    print(i)

for i in range(0, 10, 3): # 0, 3, 6, 9
    print(i)

enumerate — index + value

When you need both the index and the value, use enumerate instead of manually tracking an index variable.

names = ["Alice", "Bob", "Charlie"]

# Bad — manual index
i = 0
for name in names:
    print(i, name)
    i += 1

# Good — enumerate
for i, name in enumerate(names):
    print(i, name)

# Start from a different index
for i, name in enumerate(names, start=1):
    print(f"{i}. {name}")
#> 1. Alice
#> 2. Bob
#> 3. Charlie

zip — loop over multiple lists together

names  = ["Alice", "Bob", "Charlie"]
scores = [88, 92, 75]

for name, score in zip(names, scores):
    print(f"{name}: {score}")
#> Alice: 88
#> Bob: 92
#> Charlie: 75

while loops

# Count down
n = 5
while n > 0:
    print(n)
    n -= 1

# Loop until a condition is met
import random
attempts = 0
while True:
    roll = random.randint(1, 6)
    attempts += 1
    if roll == 6:
        print(f"Rolled a 6 after {attempts} attempts")
        break   # exit the loop

break, continue, else

nums = [2, 4, 6, 7, 8, 10]

# break — stop the loop early
for n in nums:
    if n % 2 != 0:
        print(f"Found odd number: {n}")
        break

# continue — skip to the next iteration
for n in nums:
    if n % 2 != 0:
        continue    # skip odd numbers
    print(n)        # only prints even numbers

# else — runs if the loop finished without a break
for n in [2, 4, 6, 8]:
    if n % 2 != 0:
        print("Found an odd number")
        break
else:
    print("All numbers were even")  # this runs

Looping over dictionaries

student = {"name": "Emad", "age": 25, "track": "AI/ML"}

# Keys only
for key in student:
    print(key)

# Keys and values
for key, value in student.items():
    print(f"{key} = {value}")

# Values only
for value in student.values():
    print(value)

Never modify a list while looping over it. If you need to filter, build a new list instead, or loop over a copy: for item in my_list[:]:

Comprehensions

Comprehensions are concise one-liners for building lists, dicts, and sets from loops. They are idiomatic Python — you will see them everywhere in ML code.

List comprehensions

# Basic: [expression for item in iterable]
squares = [x ** 2 for x in range(10)]
# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

# With a condition: [expression for item in iterable if condition]
evens = [x for x in range(20) if x % 2 == 0]
# [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

# Transform + filter
upper_long = [name.upper() for name in ["al", "bob", "charlie"] if len(name) > 2]
# ['BOB', 'CHARLIE']

# Nested loop — flatten a 2D list
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [num for row in matrix for num in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]

Dictionary comprehensions

# {key_expr: value_expr for item in iterable}
names = ["alice", "bob", "charlie"]
name_lengths = {name: len(name) for name in names}
# {'alice': 5, 'bob': 3, 'charlie': 7}

# Invert a dictionary
original = {"a": 1, "b": 2, "c": 3}
inverted = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b', 3: 'c'}

Set comprehensions

# {expression for item in iterable}
words = ["hello", "world", "hello", "python"]
unique_lengths = {len(w) for w in words}
# {5, 6}

Readability rule: if a comprehension needs more than one if or more than two for clauses, rewrite it as a regular loop. Clever one-liners that take 30 seconds to parse are not Pythonic.

Functions

Functions bundle reusable logic. In Python they are first-class objects — you can assign them to variables, pass them as arguments, and return them from other functions.

# Basic function
def greet(name):
    return f"Hello, {name}!"

print(greet("Emad"))  #> Hello, Emad!

# Default arguments
def power(base, exp=2):
    return base ** exp

print(power(3))       #> 9
print(power(3, 3))    #> 27

# Multiple return values (returns a tuple)
def min_max(nums):
    return min(nums), max(nums)

lo, hi = min_max([4, 1, 7, 2])
print(lo, hi)  #> 1 7

# *args — variable number of positional arguments
def total(*args):
    return sum(args)

print(total(1, 2, 3, 4))  #> 10

# **kwargs — variable number of keyword arguments
def build_profile(**kwargs):
    return kwargs

print(build_profile(name="Emad", role="ML Engineer"))
#> {'name': 'Emad', 'role': 'ML Engineer'}

Lambda functions

Small anonymous functions for quick one-off operations. Common with sorted, map, and filter.

# Lambda syntax: lambda arguments: expression
double = lambda x: x * 2
print(double(5))  #> 10

# Sort a list of tuples by the second element
pairs = [(1, "b"), (3, "a"), (2, "c")]
pairs.sort(key=lambda p: p[1])
print(pairs)  #> [(3, 'a'), (1, 'b'), (2, 'c')]

# Filter and map
nums = [1, 2, 3, 4, 5, 6, 7, 8]
evens   = list(filter(lambda x: x % 2 == 0, nums))  # [2, 4, 6, 8]
doubled = list(map(lambda x: x * 2, nums))           # [2, 4, 6, 8, 10, 12, 14, 16]

Prefer comprehensions over map/filter in most cases. [x * 2 for x in nums] is clearer than list(map(lambda x: x * 2, nums)).

Common Patterns in ML Code

Patterns you will see constantly in data science and machine learning Python code.

Processing a dataset row by row

dataset = [
    {"text": "Great product!", "label": "positive"},
    {"text": "Terrible service", "label": "negative"},
    {"text": "It was okay", "label": "neutral"},
]

# Extract all texts
texts  = [row["text"] for row in dataset]
labels = [row["label"] for row in dataset]

# Filter by label
positives = [row for row in dataset if row["label"] == "positive"]

Building a vocabulary from text

corpus = ["the cat sat", "the cat ate", "the dog sat"]

# Unique words across all sentences
vocab = sorted(set(word for sentence in corpus for word in sentence.split()))
# ['ate', 'cat', 'dog', 'sat', 'the']

# Word to index mapping
word2idx = {word: idx for idx, word in enumerate(vocab)}
# {'ate': 0, 'cat': 1, 'dog': 2, 'sat': 3, 'the': 4}

Counting occurrences

from collections import Counter

words = "the cat sat on the mat the cat".split()
counts = Counter(words)
print(counts)
#> Counter({'the': 3, 'cat': 2, 'sat': 1, 'on': 1, 'mat': 1})

print(counts.most_common(2))
#> [('the', 3), ('cat', 2)]

Batching a list

def batch(items, size):
    for i in range(0, len(items), size):
        yield items[i : i + size]

data = list(range(10))
for b in batch(data, 3):
    print(b)
#> [0, 1, 2]
#> [3, 4, 5]
#> [6, 7, 8]
#> [9]

Generators (using yield) produce items one at a time instead of building the full list in memory. Critical for large datasets that don't fit in RAM.