17 Container Modules
Containers hold other modules. They enable flexible architecture design.
17.1 Sequential
Apply modules in order:
class Sequential(Module):
"""A sequential container of modules."""
def __init__(self, *modules):
super().__init__()
for i, module in enumerate(modules):
self._modules[str(i)] = module
def forward(self, x):
for module in self._modules.values():
x = module(x)
return x
def __getitem__(self, idx):
return list(self._modules.values())[idx]
def __len__(self):
return len(self._modules)Usage:
model = Sequential(
Linear(4, 16),
LayerNorm(16),
ReLU(),
Dropout(0.2),
Linear(16, 3)
)
x = Tensor(np.random.randn(32, 4))
y = model(x) # Passes through all layers in order
Note
Code Reference: See src/tensorweaver/layers/ for container implementations.
17.2 ModuleList
A list of modules (for dynamic architectures):
class ModuleList(Module):
"""A list of modules."""
def __init__(self, modules=None):
super().__init__()
self._list = []
if modules:
for module in modules:
self.append(module)
def append(self, module):
idx = len(self._list)
self._modules[str(idx)] = module
self._list.append(module)
def __getitem__(self, idx):
return self._list[idx]
def __iter__(self):
return iter(self._list)
def __len__(self):
return len(self._list)
Note
Code Reference: See src/tensorweaver/layers/layer_list.py for the implementation.
Usage:
class MultiHeadNetwork(Module):
def __init__(self, input_size, num_heads):
super().__init__()
self.heads = ModuleList([
Linear(input_size, 16) for _ in range(num_heads)
])
def forward(self, x):
outputs = [head(x) for head in self.heads]
return sum(outputs) / len(outputs)17.3 ModuleDict
A dictionary of modules:
class ModuleDict(Module):
"""A dictionary of modules."""
def __init__(self, modules=None):
super().__init__()
if modules:
for name, module in modules.items():
self[name] = module
def __setitem__(self, name, module):
self._modules[name] = module
def __getitem__(self, name):
return self._modules[name]
def __contains__(self, name):
return name in self._modules
def keys(self):
return self._modules.keys()
def values(self):
return self._modules.values()
def items(self):
return self._modules.items()
Note
Code Reference: See src/tensorweaver/layers/layer_dict.py for the implementation.
Usage:
class MultiTaskModel(Module):
def __init__(self, input_size, hidden_size):
super().__init__()
self.shared = Linear(input_size, hidden_size)
self.heads = ModuleDict({
'classification': Linear(hidden_size, 10),
'regression': Linear(hidden_size, 1),
'embedding': Linear(hidden_size, 64)
})
def forward(self, x, task):
x = relu(self.shared(x))
return self.heads[task](x)
model = MultiTaskModel(100, 64)
cls_out = model(x, 'classification')
reg_out = model(x, 'regression')17.4 ReLU as a Module
For Sequential, we need activations as modules:
class ReLU(Module):
"""ReLU activation as a module."""
def forward(self, x):
return relu(x) # Use functional version
class GELU(Module):
def forward(self, x):
return gelu(x)
class Sigmoid(Module):
def forward(self, x):
return sigmoid(x)17.5 Dropout as a Module
class Dropout(Module):
def __init__(self, p=0.5):
super().__init__()
self.p = p
def forward(self, x):
if not self.training or self.p == 0:
return x
mask = (np.random.rand(*x.shape) > self.p).astype(np.float32)
scale = 1.0 / (1 - self.p)
result = Tensor(x.data * mask * scale, requires_grad=x.requires_grad)
return result17.6 Building Complex Models
With containers, complex models are easy:
class TransformerBlock(Module):
def __init__(self, d_model, num_heads):
super().__init__()
self.attention = MultiHeadAttention(d_model, num_heads)
self.ln1 = LayerNorm(d_model)
self.ffn = Sequential(
Linear(d_model, d_model * 4),
GELU(),
Linear(d_model * 4, d_model)
)
self.ln2 = LayerNorm(d_model)
self.dropout = Dropout(0.1)
def forward(self, x):
# Attention with residual
x = x + self.dropout(self.attention(self.ln1(x)))
# FFN with residual
x = x + self.dropout(self.ffn(self.ln2(x)))
return x
class Transformer(Module):
def __init__(self, d_model, num_heads, num_layers):
super().__init__()
self.layers = ModuleList([
TransformerBlock(d_model, num_heads)
for _ in range(num_layers)
])
self.ln_f = LayerNorm(d_model)
def forward(self, x):
for layer in self.layers:
x = layer(x)
return self.ln_f(x)17.7 Refactored Iris MLP
class IrisClassifier(Module):
def __init__(self):
super().__init__()
self.net = Sequential(
Linear(4, 16),
LayerNorm(16),
ReLU(),
Dropout(0.2),
Linear(16, 3)
)
def forward(self, x):
return self.net(x)
# Usage
model = IrisClassifier()
print(model)
# IrisClassifier(
# (net): Sequential(
# (0): Linear(...)
# (1): LayerNorm(...)
# (2): ReLU()
# (3): Dropout(...)
# (4): Linear(...)
# )
# )
optimizer = Adam(model.parameters())17.8 Summary
- Sequential: Apply modules in order
- ModuleList: Dynamic list of modules
- ModuleDict: Named dictionary of modules
- Containers automatically track all parameters
- Enables clean, composable architectures
Next: Saving and loading models with state_dict.