Collecting all parameters in Neural Net
In [1]:
Copied!
from graphviz import Digraph
def trace(root):
#Builds a set of all nodes and edges in a graph
nodes, edges = set(), set()
def build(v):
if v not in nodes:
nodes.add(v)
for child in v._prev:
edges.add((child, v))
build(child)
build(root)
return nodes, edges
def draw_dot(root):
dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right
nodes, edges = trace(root)
for n in nodes:
uid = str(id(n))
#For any value in the graph, create a rectangular ('record') node for it
dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % ( n.label, n.data, n.grad), shape='record')
if n._op:
#If this value is a result of some operation, then create an op node for it
dot.node(name = uid + n._op, label=n._op)
#and connect this node to it
dot.edge(uid + n._op, uid)
for n1, n2 in edges:
#Connect n1 to the node of n2
dot.edge(str(id(n1)), str(id(n2)) + n2._op)
return dot
from graphviz import Digraph
def trace(root):
#Builds a set of all nodes and edges in a graph
nodes, edges = set(), set()
def build(v):
if v not in nodes:
nodes.add(v)
for child in v._prev:
edges.add((child, v))
build(child)
build(root)
return nodes, edges
def draw_dot(root):
dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right
nodes, edges = trace(root)
for n in nodes:
uid = str(id(n))
#For any value in the graph, create a rectangular ('record') node for it
dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % ( n.label, n.data, n.grad), shape='record')
if n._op:
#If this value is a result of some operation, then create an op node for it
dot.node(name = uid + n._op, label=n._op)
#and connect this node to it
dot.edge(uid + n._op, uid)
for n1, n2 in edges:
#Connect n1 to the node of n2
dot.edge(str(id(n1)), str(id(n2)) + n2._op)
return dot
In [2]:
Copied!
import math
import math
In [3]:
Copied!
class Value:
def __init__(self, data, _children=(), _op='', label=''):
self.data = data
self.grad = 0.0
self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.
self._prev = set(_children)
self._op = _op
self.label = label
def __repr__(self):
return f"Value(data={self.data})"
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other), '+')
def backward():
self.grad += 1.0 * out.grad
other.grad += 1.0 * out.grad
out._backward = backward
return out
def __radd__(self, other): #here
return self + other
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other), '*')
def backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = backward
return out
def __rmul__(self, other): #other * self
return self * other
def __truediv__(self, other): #self/other
return self * other**-1
def __neg__(self):
return self * -1
def __sub__(self, other): #self - other
return self + (-other)
def __pow__(self, other):
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
out = Value(self.data ** other, (self, ), f"**{other}")
def backward():
self.grad += (other * (self.data ** (other - 1))) * out.grad
out._backward = backward
return out
def tanh(self):
x = self.data
t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
out = Value(t, (self, ), 'tanh')
def backward():
self.grad += 1 - (t**2) * out.grad
out._backward = backward
return out
def exp(self):
x = self.data
out = Value(math.exp(x), (self, ), 'exp') #We merged t and out, into just out
def backward():
self.grad += out.data * out.grad
out._backward = backward
return out
def backward(self):
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)
build_topo(self)
self.grad = 1.0
for node in reversed(topo):
node._backward()
class Value:
def __init__(self, data, _children=(), _op='', label=''):
self.data = data
self.grad = 0.0
self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.
self._prev = set(_children)
self._op = _op
self.label = label
def __repr__(self):
return f"Value(data={self.data})"
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other), '+')
def backward():
self.grad += 1.0 * out.grad
other.grad += 1.0 * out.grad
out._backward = backward
return out
def __radd__(self, other): #here
return self + other
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other), '*')
def backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = backward
return out
def __rmul__(self, other): #other * self
return self * other
def __truediv__(self, other): #self/other
return self * other**-1
def __neg__(self):
return self * -1
def __sub__(self, other): #self - other
return self + (-other)
def __pow__(self, other):
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
out = Value(self.data ** other, (self, ), f"**{other}")
def backward():
self.grad += (other * (self.data ** (other - 1))) * out.grad
out._backward = backward
return out
def tanh(self):
x = self.data
t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
out = Value(t, (self, ), 'tanh')
def backward():
self.grad += 1 - (t**2) * out.grad
out._backward = backward
return out
def exp(self):
x = self.data
out = Value(math.exp(x), (self, ), 'exp') #We merged t and out, into just out
def backward():
self.grad += out.data * out.grad
out._backward = backward
return out
def backward(self):
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)
build_topo(self)
self.grad = 1.0
for node in reversed(topo):
node._backward()
In [4]:
Copied!
import random
import random
In [ ]:
Copied!
class Neuron:
def __init__(self, nin):
self.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]
self.b = Value(random.uniform(-1,1))
def __call__(self, x):
# (w*x)+b
act = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )
out = act.tanh()
return out
class Layer:
def __init__(self, nin, nout):
self.neurons = [Neuron(nin) for _ in range(nout)]
def __call__(self, x):
outs = [n(x) for n in self.neurons]
return outs[0] if len(outs)==1 else outs #The New added line for making the output better
class MLP:
def __init__(self, nin, nouts):
sz = [nin] + nouts
self.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]
def __call__(self, x):
for layer in self.layers:
x = layer(x)
return x
class Neuron:
def __init__(self, nin):
self.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]
self.b = Value(random.uniform(-1,1))
def __call__(self, x):
# (w*x)+b
act = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )
out = act.tanh()
return out
class Layer:
def __init__(self, nin, nout):
self.neurons = [Neuron(nin) for _ in range(nout)]
def __call__(self, x):
outs = [n(x) for n in self.neurons]
return outs[0] if len(outs)==1 else outs #The New added line for making the output better
class MLP:
def __init__(self, nin, nouts):
sz = [nin] + nouts
self.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]
def __call__(self, x):
for layer in self.layers:
x = layer(x)
return x
In [ ]:
Copied!
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
Out[ ]:
Value(data=-0.33393070997191954)
Now, we'll be returning the parameters from the MLP. So that will be from Neuron -> Layer -> MLP
In [5]:
Copied!
class Neuron:
def __init__(self, nin):
self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
self.b = Value(random.uniform(-1, 1))
def __call__(self, x):
act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
out = act.tanh()
return out
def parameters(self):
return self.w + [self.b]
class Layer:
def __init__(self, nin, nout):
self.neurons = [Neuron(nin) for _ in range(nout)]
def __call__(self, x):
outs = [n(x) for n in self.neurons]
return outs[0] if len(outs) == 1 else outs
def parameters(self):
return [p for n in self.neurons for p in n.parameters()]
# Alternative way of writing the above return function:
# parameters = []
# for n in self.neurons:
# p = n.parameters()
# parameters.extend(p)
class MLP:
def __init__(self, nin, nouts):
sz = [nin] + nouts
self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]
def __call__(self, x):
for layer in self.layers:
x = layer(x)
return x
def parameters(self):
return [p for l in self.layers for p in l.parameters()]
class Neuron:
def __init__(self, nin):
self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
self.b = Value(random.uniform(-1, 1))
def __call__(self, x):
act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
out = act.tanh()
return out
def parameters(self):
return self.w + [self.b]
class Layer:
def __init__(self, nin, nout):
self.neurons = [Neuron(nin) for _ in range(nout)]
def __call__(self, x):
outs = [n(x) for n in self.neurons]
return outs[0] if len(outs) == 1 else outs
def parameters(self):
return [p for n in self.neurons for p in n.parameters()]
# Alternative way of writing the above return function:
# parameters = []
# for n in self.neurons:
# p = n.parameters()
# parameters.extend(p)
class MLP:
def __init__(self, nin, nouts):
sz = [nin] + nouts
self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]
def __call__(self, x):
for layer in self.layers:
x = layer(x)
return x
def parameters(self):
return [p for l in self.layers for p in l.parameters()]
In [6]:
Copied!
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
Out[6]:
Value(data=0.7625252102576119)
In [7]:
Copied!
n.parameters()
n.parameters()
Out[7]:
[Value(data=0.31785584973173164), Value(data=0.2998372553774835), Value(data=-0.8029008199517247), Value(data=-0.39340060142531286), Value(data=0.23322412084873956), Value(data=0.29891813550514534), Value(data=-0.5314862907700675), Value(data=0.19661072911432642), Value(data=0.9142418954398666), Value(data=0.041208786424172805), Value(data=-0.23983634992214187), Value(data=-0.593538786941121), Value(data=0.39482399486723296), Value(data=-0.9880306400643504), Value(data=-0.8097855189886964), Value(data=0.4629484174790124), Value(data=0.31168805444961634), Value(data=-0.9828138115624934), Value(data=0.5221437252554255), Value(data=-0.19703997468926882), Value(data=-0.5504279057638468), Value(data=-0.8365261779265616), Value(data=-0.22783861276612227), Value(data=0.5666981389300718), Value(data=-0.06415010714317604), Value(data=0.845414529622897), Value(data=0.4793425135418725), Value(data=-0.38321354069020086), Value(data=-0.10963021731006206), Value(data=0.14485994942129898), Value(data=-0.19028270981146433), Value(data=0.5148204886483112), Value(data=-0.8559156650791364), Value(data=0.3778416962066449), Value(data=0.09608787032156774), Value(data=-0.8288362456839788), Value(data=0.5641592956285757), Value(data=0.13764114112689052), Value(data=-0.19625087652731277), Value(data=-0.6117936229921406), Value(data=0.7546009612155813)]
So these are all our parameters provided as inputs. The weights, inputs and biases
In [8]:
Copied!
len(n.parameters())
len(n.parameters())
Out[8]:
41