Creating a Loss function

One small change as been done in our Value object. Please note that

The radd() functionality has been added.

In [1]:

Copied!





from graphviz import Digraph

def trace(root):
    #Builds a set of all nodes and edges in a graph
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        #For any value in the graph, create a rectangular ('record') node for it
        dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % ( n.label, n.data, n.grad), shape='record')
        if n._op:
            #If this value is a result of some operation, then create an op node for it
            dot.node(name = uid + n._op, label=n._op)
            #and connect this node to it
            dot.edge(uid + n._op, uid)

    for n1, n2 in edges:
        #Connect n1 to the node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot
from graphviz import Digraph

def trace(root):
    #Builds a set of all nodes and edges in a graph
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        #For any value in the graph, create a rectangular ('record') node for it
        dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % ( n.label, n.data, n.grad), shape='record')
        if n._op:
            #If this value is a result of some operation, then create an op node for it
            dot.node(name = uid + n._op, label=n._op)
            #and connect this node to it
            dot.edge(uid + n._op, uid)

    for n1, n2 in edges:
        #Connect n1 to the node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)

    return dot

In [2]:

Copied!

import math
import math

In [20]:

Copied!





class Value:

    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.
        self._prev = set(_children)
        self._op = _op
        self.label = label


    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def backward():
          self.grad += 1.0 * out.grad
          other.grad += 1.0 * out.grad

        out._backward = backward
        return out

    def __radd__(self, other): #here
        return self + other

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def backward():
          self.grad += other.data * out.grad
          other.grad += self.data * out.grad
        out._backward = backward
        return out

    def __rmul__(self, other):   #other * self
        return self * other

    def __truediv__(self, other):  #self/other
        return self * other**-1

    def __neg__(self):
        return self * -1

    def __sub__(self, other):  #self - other
        return self + (-other)

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data ** other, (self, ), f"**{other}")

        def backward():
          self.grad += (other * (self.data ** (other - 1))) * out.grad

        out._backward = backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')

        def backward():
          self.grad += 1 - (t**2) * out.grad

        out._backward = backward
        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')   #We merged t and out, into just out

        def backward():
            self.grad += out.data * out.grad

        out._backward = backward
        return out

    def backward(self):

      topo = []
      visited = set()
      def build_topo(v):
        if v not in visited:
          visited.add(v)
          for child in v._prev:
            build_topo(child)
          topo.append(v)

      build_topo(self)

      self.grad = 1.0
      for node in reversed(topo):
        node._backward()
class Value:

    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.
        self._prev = set(_children)
        self._op = _op
        self.label = label


    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def backward():
          self.grad += 1.0 * out.grad
          other.grad += 1.0 * out.grad

        out._backward = backward
        return out

    def __radd__(self, other): #here
        return self + other

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def backward():
          self.grad += other.data * out.grad
          other.grad += self.data * out.grad
        out._backward = backward
        return out

    def __rmul__(self, other):   #other * self
        return self * other

    def __truediv__(self, other):  #self/other
        return self * other**-1

    def __neg__(self):
        return self * -1

    def __sub__(self, other):  #self - other
        return self + (-other)

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data ** other, (self, ), f"**{other}")

        def backward():
          self.grad += (other * (self.data ** (other - 1))) * out.grad

        out._backward = backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
        out = Value(t, (self, ), 'tanh')

        def backward():
          self.grad += 1 - (t**2) * out.grad

        out._backward = backward
        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')   #We merged t and out, into just out

        def backward():
            self.grad += out.data * out.grad

        out._backward = backward
        return out

    def backward(self):

      topo = []
      visited = set()
      def build_topo(v):
        if v not in visited:
          visited.add(v)
          for child in v._prev:
            build_topo(child)
          topo.append(v)

      build_topo(self)

      self.grad = 1.0
      for node in reversed(topo):
        node._backward()

In [4]:

Copied!

import random
import random

In [21]:

Copied!





class Neuron:
	def __init__(self, nin):
		self.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]
		self.b = Value(random.uniform(-1,1))

	def __call__(self, x):
		# (w*x)+b
		act = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )
		out = act.tanh()
		return out

class Layer:
	def __init__(self, nin, nout):
		self.neurons = [Neuron(nin) for _ in range(nout)]

	def __call__(self, x):
		outs = [n(x) for n in self.neurons]
		return outs[0] if len(outs)==1 else outs  #The New added line for making the output better

class MLP:
	def __init__(self, nin, nouts):
		sz = [nin] + nouts
		self.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]

	def __call__(self, x):
		for layer in self.layers:
			x = layer(x)
		return x
class Neuron:
	def __init__(self, nin):
		self.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]
		self.b = Value(random.uniform(-1,1))

	def __call__(self, x):
		# (w*x)+b
		act = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )
		out = act.tanh()
		return out

class Layer:
	def __init__(self, nin, nout):
		self.neurons = [Neuron(nin) for _ in range(nout)]

	def __call__(self, x):
		outs = [n(x) for n in self.neurons]
		return outs[0] if len(outs)==1 else outs  #The New added line for making the output better

class MLP:
	def __init__(self, nin, nouts):
		sz = [nin] + nouts
		self.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]

	def __call__(self, x):
		for layer in self.layers:
			x = layer(x)
		return x

In [23]:

Copied!

x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)

Out[23]:

Value(data=-0.33393070997191954)

Now, we are creating a very simple dataset. Where we feed a list of values which we would like the NN to take as input.

Along with the list of desired output targets.
So whenever we give the values xs into the NN, we want the output values to be those in ys respectively. \

It's almost like a simple Binary Classification. It needs to be either 1.0 or -1.0 in our example.

In [24]:

Copied!





xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

ys = [1.0, -1.0, -1.0, 1.0] #desired targets
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]

ys = [1.0, -1.0, -1.0, 1.0] #desired targets

Now, ys is what WE WANT.

Lets see what our NN thinks of those inputs

In [25]:

Copied!

ypred = [n(x) for x in xs]
ypred
ypred = [n(x) for x in xs]
ypred

Out[25]:

[Value(data=-0.33393070997191954),
 Value(data=-0.7996605801165794),
 Value(data=-0.053910703703307694),
 Value(data=-0.5691658715750736)]

So we can see how the outputs we have received are different from what we need

first and fourth one needs to be increased slighly to reach our desired value.

And the second, third one needs to be decreased.

So now how do train/tune the NN or how do tune the weights in order to get our desired output.

The trick in NN is to calculate a single value that contains the sum of the total performance of your NN. And that will be the loss value.

So this loss, will give us an intuition on how well the NN is performing.

Right now in our example, it is not performing very well, as the values are way off. Therefore the loss will be high and we need to look to minimize the loss.

So in this particular situation/example, we are going to do/calculate the Mean Squared Error Loss.

So first we take the 'y ground truth (ygt)' and then 'y output (yout)'. We will be pairing them together in the loop

In [ ]:

Copied!

for ygt, yout in zip(ys, ypred)
for ygt, yout in zip(ys, ypred)

And then we will be subtracting each of those values and then squaring them

Now that will give us the loss for each of those individual values

In [11]:

Copied!

[(yout - ygt)**2 for ygt, yout in zip(ys, ypred)]
[(yout - ygt)**2 for ygt, yout in zip(ys, ypred)]

Out[11]:

[Value(data=0.2551293266642757),
 Value(data=1.5689343597801393),
 Value(data=1.942823557477381),
 Value(data=0.2264329556998548)]

So depending on how off your pred value is from the expected one, the higher th value will be

That expression is written in such a way that, only when the yout and ygt are close to each other, it will become 0. Therefore no loss in that case

So the aim is to make the loss as small as possible

Now, as mentioned before. The final loss will just be the sum of all those numbers.

In [26]:

Copied!

loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
loss
loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
loss

Out[26]:

Value(data=5.176873511248545)

Note: There was an error here where int could not be added to a Value. So I have made the modification in the Value object to handle that :)

In [27]:

Copied!

loss.backward()
loss.backward()

Something cool really happended when you ran that last cell

In [28]:

Copied!

n.layers[0].neurons[0].w[0]
n.layers[0].neurons[0].w[0]

Out[28]:

Value(data=-0.22585628440403194)

In [29]:

Copied!

n.layers[0].neurons[0].w[0].grad
n.layers[0].neurons[0].w[0].grad

Out[29]:

3.080379043409595

So now we have extracted the value and its grad value for one particular neuron!

Lets look at the graph of the loss. That will give us a bigger graph, as if you would notice in the mean squared error expression that we wrote, we are passing each of the neuron to it.

In [30]:

Copied!

draw_dot(loss)
draw_dot(loss)

Out[30]:

No description has been provided for this image

PHEEWWWW THAT WAS AWESOME LMAO

We have like 4 different foward pass and finally calculating the loss

Not only that, we have also backpropagated throughout the entire graph!