The personal website of Scott W Harden
June 18th, 2009

pySquelch - Frequency Activity Reports via Python

Update: this project is now on GitHub https://github.com/FredEckert/pySquelch

I've been working on the pySquelch project which is basically a method to graph frequency usage with respect to time. The code I'm sharing below listens to the microphone jack on the sound card (hooked up to a radio) and determines when transmissions begin and end. I ran the code below for 24 hours and this is the result:

This graph represents frequency activity with respect to time. The semi-transparent gray line represents the raw frequency usage in fractional minutes the frequency was tied-up by transmissions. The solid blue line represents the same data but smoothed by 10 minutes (in both directions) by the Gaussian smoothing method modified slightly from my linear data smoothing with Python page.

I used the code below to generate the log, and the code further below to create the graph from the log file. Assuming your microphone is enabled and everything else is working, this software will require you to determine your own threshold for talking vs. no talking. Read the code and you'll figure out how test your sound card settings.

If you want to try this yourself you need a Linux system (a Windows system version could be created simply by replacing getVolEach() with a Windows-based audio level detection system) with Python and the alsaaudio, numpy, and matplotlib libraries. Try running the code on your own, and if it doesn't recognize a library "aptitude search" for it. Everything you need can be installed from packages in the common repository.


# pySquelchLogger.py
import time
import random
import alsaaudio
import audioop
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
inp.setchannels(2)
inp.setrate(1000)
inp.setformat(alsaaudio.PCM_FORMAT_S8)
inp.setperiodsize(100)
addToLog = ""
lastLogTime = 0

testLevel = False  # SET THIS TO 'True' TO TEST YOUR SOUNDCARD


def getVolEach():
    # this is a quick way to detect activity.
    # modify this function to use alternate methods of detection.
    while True:
        l, data = inp.read()  # poll the audio device
        if l > 0:
            break
    vol = audioop.max(data, 1)  # get the maximum amplitude
    if testLevel:
        print vol
    if vol > 10:
        return True  # SET THIS NUMBER TO SUIT YOUR NEEDS ###
    return False


def getVol():
    # reliably detect activity by getting 3 consistant readings.
    a, b, c = True, False, False
    while True:
        a = getVolEach()
        b = getVolEach()
        c = getVolEach()
        if a == b == c:
            if testLevel:
                print "RESULT:", a
            break
    if a == True:
        time.sleep(1)
    return a


def updateLog():
    # open the log file, append the new data, and save it again.
    global addToLog, lastLogTime
    # print "UPDATING LOG"
    if len(addToLog) > 0:
        f = open('log.txt', 'a')
        f.write(addToLog)
        f.close()
        addToLog = ""
    lastLogTime = time.mktime(time.localtime())


def findSquelch():
    # this will record a single transmission and store its data.
    global addToLog
    while True:  # loop until we hear talking
        time.sleep(.5)
        if getVol() == True:
            start = time.mktime(time.localtime())
            print start,
            break
    while True:  # loop until talking stops
        time.sleep(.1)
        if getVol() == False:
            length = time.mktime(time.localtime())-start
            print length
            break
    newLine = "%d,%d " % (start, length)
    addToLog += newLine
    if start-lastLogTime > 30:
        updateLog()  # update the log


while True:
    findSquelch()

The logging code (above) produces a log file like this (below). The values represent the start time of each transmission (in seconds since epoch) followed by the duration of the transmission.

#log.txt
1245300044,5 1245300057,4 1245300063,16 1245300094,13 1245300113,4 1245300120,14 1245300195,4 1245300295,4 1245300348,4 1245300697,7 1245300924,3 1245301157,4 1245301207,12 1245301563,4 1245302104,6 1245302114,6 1245302192,3 1245302349,4 1245302820,4 1245304812,13 1245308364,10 1245308413,14 1245312008,14 1245313953,11 1245314008,6 1245314584,4 1245314641,3 1245315212,5 1245315504,6 1245315604,13 1245315852,3 1245316255,6 1245316480,5 1245316803,3 1245316839,6 1245316848,11 1245316867,5 1245316875,12 1245316893,13 1245316912,59 1245316974,12 1245316988,21 1245317011,17 1245317044,10 1245317060,6 1245317071,7 1245317098,33 1245317140,96 1245317241,15 1245317259,14 1245317277,8 1245317298,18 1245317322,103 1245317435,40 1245317488,18 1245317508,34 1245317560,92 1245317658,29 1245317697,55 1245317755,33 1245317812,5 1245317818,7 1245317841,9 1245317865,25 1245317892,79 1245317972,30 1245318007,8 1245318021,60 1245318083,28 1245318114,23 1245318140,25 1245318167,341 1245318512,154 1245318670,160 1245318834,22 1245318859,9 1245318870,162 1245319042,57 1245319102,19 1245319123,30 1245319154,18 1245319206,5 1245319214,13 1245319229,6 1245319238,6 1245319331,9 1245319341,50 1245319397,71 1245319470,25 1245319497,40 1245319540,8 1245319551,77 1245319629,4 1245319638,36 1245319677,158 1245319837,25 1245319865,40 1245319907,33 1245319948,92 1245320043,26 1245320100,9 1245320111,34 1245320146,8 1245320159,6 1245320167,8 1245320181,12 1245320195,15 1245320212,14 1245320238,18 1245320263,46 1245320310,9 1245320326,22 1245320352,27 1245320381,15 1245320398,24 1245320425,57 1245320483,16 1245320501,40 1245320543,43 1245320589,65 1245320657,63 1245320722,129 1245320853,33 1245320889,50 1245320940,1485 1245322801,7 1245322809,103 1245322923,5 1245322929,66 1245323553,4 1245324203,15 1245324383,5 1245324570,7 1245324835,4 1245325200,8 1245325463,5 1245326414,12 1245327340,12 1245327836,4 1245327973,4 1245330006,12 1245331244,11 1245331938,11 1245332180,5 1245332187,81 1245332573,5 1245333609,12 1245334447,10 1245334924,9 1245334945,4 1245334971,4 1245335031,9 1245335076,11 1245335948,16 1245335965,27 1245335993,113 1245336107,79 1245336187,64 1245336253,37 1245336431,4 1245336588,5 1245336759,7 1245337048,3 1245337206,13 1245337228,4 1245337309,4 1245337486,6 1245337536,8 1245337565,38 1245337608,100 1245337713,25 1245337755,169 1245337930,8 1245337941,20 1245337967,6 1245337978,7 1245337996,20 1245338019,38 1245338060,127 1245338192,30 1245338227,22 1245338250,15 1245338272,15 1245338310,3 1245338508,4 1245338990,5 1245339136,5 1245339489,8 1245339765,4 1245340220,5 1245340233,6 1245340266,10 1245340278,22 1245340307,7 1245340315,28 1245340359,32 1245340395,4 1245340403,41 1245340446,46 1245340494,58 1245340554,17 1245340573,21 1245340599,3 1245340604,5 1245340611,46 1245340661,26 1245340747,4 1245340814,14 1245341043,4 1245341104,4 1245341672,4 1245341896,5 1245341906,3 1245342301,3 1245342649,6 1245342884,5 1245342929,4 1245343314,6 1245343324,10 1245343335,16 1245343353,39 1245343394,43 1245343439,62 1245343561,3 1245343790,4 1245344115,3 1245344189,5 1245344233,4 1245344241,6 1245344408,12 1245344829,3 1245345090,5 1245345457,5 1245345689,4 1245346086,3 1245347112,12 1245348006,14 1245348261,10 1245348873,4 1245348892,3 1245350303,11 1245350355,4 1245350766,5 1245350931,3 1245351605,14 1245351673,55 1245351729,23 1245351754,5 1245352123,37 1245352163,21 1245352186,18 1245352209,40 1245352251,49 1245352305,8 1245352315,5 1245352321,6 1245352329,22 1245352353,48 1245352404,77 1245352483,58 1245352543,17 1245352570,19 1245352635,5 1245352879,3 1245352899,5 1245352954,4 1245352962,6 1245352970,58 1245353031,21 1245353055,14 1245353071,52 1245353131,37 1245353170,201 1245353373,56 1245353431,18 1245353454,47 1245353502,13 1245353519,106 1245353627,10 1245353647,12 1245353660,30 1245353699,42 1245353746,28 1245353776,29 1245353806,9 1245353818,21 1245353841,10 1245353853,6 1245353862,224 1245354226,4 1245354964,63 1245355029,4 1245355036,142 1245355180,148 1245355330,7 1245355338,23 1245355363,9 1245355374,60 1245355437,142 1245355581,27 1245355609,5 1245355615,2 1245355630,64 1245355700,7 1245355709,73 1245355785,45 1245355834,85 1245355925,9 1245356234,5 1245356620,6 1245356629,12 1245356643,29 1245356676,120 1245356798,126 1245356937,62 1245357001,195 1245357210,17 1245357237,15 1245357258,24 1245357284,53 1245357339,2 1245357345,27 1245357374,76 1245357452,28 1245357482,42 1245357529,14 1245357545,35 1245357582,74 1245357661,30 1245357693,19 1245357714,38 1245357758,11 1245357777,37 1245357817,49 1245357868,19 1245357891,31 1245357931,48 1245357990,49 1245358043,24 1245358082,22 1245358108,17 1245358148,18 1245358168,7 1245358179,6 1245358186,19 1245358209,17 1245358229,5 1245358240,9 1245358252,10 1245358263,6 1245358272,9 1245358296,26 1245358328,49 1245358381,6 1245358389,38 1245358453,19 1245358476,24 1245358504,21 1245358533,76 1245358628,24 1245358653,10 1245358669,105 1245358781,20 1245358808,14 1245358836,6 1245358871,61 1245358933,0 1245358936,44 1245358982,11 1245358996,25 1245359023,15 1245359040,32 1245359076,19 1245359099,13 1245359117,16 1245359138,12 1245359161,33 1245359215,32 1245359249,14 1245359272,7 1245359314,10 1245359333,36 1245359371,21 1245359424,10 1245359447,61 1245359514,32 1245359560,42 1245359604,87 1245359700,60 1245359762,23 1245359786,4 1245359791,8 1245359803,6 1245359813,107 1245359922,29 1245359953,22 1245359978,86 1245360069,75 1245360147,22 1245360170,0 1245360184,41 1245360239,15 1245360256,34 1245360301,37 1245360339,1 1245360342,28 1245360372,20 1245360394,32 1245360440,24 1245360526,3 1245360728,3 1245361011,4 1245361026,35 1245361064,137 1245361359,5 1245362172,11 1245362225,21 1245362248,51 1245362302,20 1245362334,42 1245362418,12 1245362468,7 1245362557,9 1245362817,3 1245363175,4 1245363271,4 1245363446,3 1245363539,4 1245363573,4 1245363635,1 1245363637,3 1245363740,5 1245363875,3 1245364075,4 1245364354,14 1245364370,19 1245364391,49 1245364442,34 1245364478,23 1245364502,80 1245364633,15 1245364650,8 1245364673,16 1245364691,47 1245364739,53 1245364795,39 1245364836,25 1245365353,4 1245365640,11 1245365665,5 1245365726,8 1245365778,7 1245365982,4 1245366017,13 1245366042,6 1245366487,4 1245366493,4 1245366500,4 1245366507,3 1245366622,5 1245366690,5 1245366946,4 1245366953,16 1245366975,8 1245366996,7 1245367005,7 1245367031,6 1245367040,9 1245367051,7 1245367059,23 1245367084,76 1245367166,158 1245367740,4 1245367804,3 1245367847,4 1245367887,9 1245369300,10 1245369611,12 1245370038,10 1245370374,8 1245370668,5 1245370883,5 1245370927,7 1245370945,9 1245370961,16 1245370978,414 1245371398,135 1245371535,252 1245371791,238 1245372034,199 1245372621,4 1245372890,5 1245373043,7 1245373060,9 1245373073,6 1245373081,68 1245373151,10 1245373162,49 1245373212,79 1245373300,12 1245373313,38 1245373353,20 1245373374,59 1245373435,28 1245373465,94 1245373560,11 1245373574,53 1245373629,22 1245373654,6 1245373662,334 1245373998,169 1245374176,41 1245374219,26 1245374246,51 1245374299,31 1245374332,57 1245374391,55 1245374535,4 1245374759,7 1245374769,200 1245374971,215 1245375188,181 1245375371,81 1245375455,59 1245375516,33 1245375552,19 1245375572,56 1245375629,220 1245375850,32 1245375884,26 1245375948,7 1245375964,114 1245376473,4 1245376810,13 1245378296,10 1245378950,12 1245379004,3 1245379569,4 1245379582,4 1245379615,6 1245380030,3 1245380211,4 1245380412,14 1245380727,4 1245380850,4

This log file is only 7.3 KB. At this rate, a years' worth of log data can be stored in less than 3MB of plain text files. The data presented here can be graphed (producing the image at the top of the page) using the following code:

# pySquelchGrapher.py
import numpy
import datetime
import pylab
print "loading libraries...",
print "complete"


def loadData(fname="log.txt"):
    print "loading data...",
    # load signal/duration from log file
    f = open(fname)
    raw = f.read()
    f.close()
    raw = raw.replace('n', ' ')
    raw = raw.split(" ")
    signals = []
    for line in raw:
        if len(line) < 3:
            continue
        line = line.split(',')
        sec = datetime.datetime.fromtimestamp(int(line[0]))
        dur = int(line[1])
        signals.append([sec, dur])
    print "complete"
    return signals


def findDays(signals):
    # determine which days are in the log file
    print "finding days...",
    days = []
    for signal in signals:
        day = signal[0].date()
        if not day in days:
            days.append(day)
    print "complete"
    return days


def genMins(day):
    # generate an array for every minute in a certain day
    print "generating bins...",
    mins = []
    startTime = datetime.datetime(day.year, day.month, day.day)
    minute = datetime.timedelta(minutes=1)
    for i in xrange(60*60):
        mins.append(startTime+minute*i)
    print "complete"
    return mins


def fillMins(mins, signals):
    print "filling bins...",
    vals = [0]*len(mins)
    dayToDo = signals[0][0].date()
    for signal in signals:
        if not signal[0].date() == dayToDo:
            continue
        sec = signal[0]
        dur = signal[1]
        prebuf = sec.second
        minOfDay = sec.hour*60+sec.minute
        if dur+prebuf < 60:  # simple case, no rollover seconds
            vals[minOfDay] = dur
        else:  # if duration exceeds the minute the signal started in
            vals[minOfDay] = 60-prebuf
            dur = dur+prebuf
            while (dur > 0):  # add rollover seconds to subsequent minutes
                minOfDay += 1
                dur = dur-60
                if dur <= 0:
                    break
                if dur >= 60:
                    vals[minOfDay] = 60
                else:
                    vals[minOfDay] = dur
    print "complete"
    return vals


def normalize(vals):
    print "normalizing data...",
    divBy = float(max(vals))
    for i in xrange(len(vals)):
        vals[i] = vals[i]/divBy
    print "complete"
    return vals


def smoothListGaussian(list, degree=10):
    print "smoothing...",
    window = degree*2-1
    weight = numpy.array([1.0]*window)
    weightGauss = []
    for i in range(window):
        i = i-degree+1
        frac = i/float(window)
        gauss = 1/(numpy.exp((4*(frac))**2))
        weightGauss.append(gauss)
    weight = numpy.array(weightGauss)*weight
    smoothed = [0.0]*(len(list)-window)
    for i in range(len(smoothed)):
        smoothed[i] = sum(numpy.array(list[i:i+window])*weight)/sum(weight)
    while len(list) > len(smoothed)+int(window/2):
        smoothed.insert(0, smoothed[0])
    while len(list) > len(smoothed):
        smoothed.append(smoothed[0])
    print "complete"
    return smoothed


signals = loadData()
days = findDays(signals)
for day in days:
    mins = genMins(day)
    vals = normalize(fillMins(mins, signals))
    fig = pylab.figure()
    pylab.grid(alpha=.2)
    pylab.plot(mins, vals, 'k', alpha=.1)
    pylab.plot(mins, smoothListGaussian(vals), 'b', lw=1)
    pylab.axis([day, day+datetime.timedelta(days=1), None, None])
    fig.autofmt_xdate()
    pylab.title("147.120 MHz Usage for "+str(day))
    pylab.xlabel("time of day")
    pylab.ylabel("fractional usage")
    pylab.show()
Markdown source code last modified on January 18th, 2021
---
title: pySquelch - Frequency Activity Reports via Python
date: 2009-06-18 22:59:01
tags: amateur radio, python, old
---

# pySquelch - Frequency Activity Reports via Python

<p class="has-background has-light-green-cyan-background-color"><strong>Update:</strong> this project is now on GitHub  <a href="https://github.com/FredEckert/pySquelch">https://github.com/FredEckert/pySquelch</a> </p>

__I've been working on the pySquelch project__ which is basically a method to graph frequency usage with respect to time. The code I'm sharing below listens to the microphone jack on the sound card (hooked up to a radio) and determines when transmissions begin and end. I ran the code below for 24 hours and this is the result:

<div class="text-center img-border">

[![](1png_thumb.jpg)](1png.png)

</div>

__This graph represents frequency activity with respect to time. __The semi-transparent gray line represents the raw frequency usage in fractional minutes the frequency was tied-up by transmissions. The solid blue line represents the same data but smoothed by 10 minutes (in both directions) by the Gaussian smoothing method modified slightly from my [linear data smoothing with Python page](http://www.swharden.com/blog/2008-11-17-linear-data-smoothing-in-python/).

<div class="text-center img-border">

[![](2png_thumb.jpg)](2png.png)

</div>

__I used the code below to generate the log, and the code further below to create the graph from the log file.__ Assuming your microphone is enabled and everything else is working, this software will require you to determine your own threshold for talking vs. no talking. Read the code and you'll figure out how test your sound card settings.

__If you want to try this yourself__ you need a Linux system (a Windows system version could be created simply by replacing _getVolEach()_ with a Windows-based audio level detection system) with Python and the alsaaudio, numpy, and matplotlib libraries. Try running the code on your own, and if it doesn't recognize a library "aptitude search" for it. Everything you need can be installed from packages in the common repository.

```python

# pySquelchLogger.py
import time
import random
import alsaaudio
import audioop
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
inp.setchannels(2)
inp.setrate(1000)
inp.setformat(alsaaudio.PCM_FORMAT_S8)
inp.setperiodsize(100)
addToLog = ""
lastLogTime = 0

testLevel = False  # SET THIS TO 'True' TO TEST YOUR SOUNDCARD


def getVolEach():
    # this is a quick way to detect activity.
    # modify this function to use alternate methods of detection.
    while True:
        l, data = inp.read()  # poll the audio device
        if l > 0:
            break
    vol = audioop.max(data, 1)  # get the maximum amplitude
    if testLevel:
        print vol
    if vol > 10:
        return True  # SET THIS NUMBER TO SUIT YOUR NEEDS ###
    return False


def getVol():
    # reliably detect activity by getting 3 consistant readings.
    a, b, c = True, False, False
    while True:
        a = getVolEach()
        b = getVolEach()
        c = getVolEach()
        if a == b == c:
            if testLevel:
                print "RESULT:", a
            break
    if a == True:
        time.sleep(1)
    return a


def updateLog():
    # open the log file, append the new data, and save it again.
    global addToLog, lastLogTime
    # print "UPDATING LOG"
    if len(addToLog) > 0:
        f = open('log.txt', 'a')
        f.write(addToLog)
        f.close()
        addToLog = ""
    lastLogTime = time.mktime(time.localtime())


def findSquelch():
    # this will record a single transmission and store its data.
    global addToLog
    while True:  # loop until we hear talking
        time.sleep(.5)
        if getVol() == True:
            start = time.mktime(time.localtime())
            print start,
            break
    while True:  # loop until talking stops
        time.sleep(.1)
        if getVol() == False:
            length = time.mktime(time.localtime())-start
            print length
            break
    newLine = "%d,%d " % (start, length)
    addToLog += newLine
    if start-lastLogTime > 30:
        updateLog()  # update the log


while True:
    findSquelch()
```

__The logging code (above) produces a log file like this (below).__ The values represent the start time of each transmission (in [seconds since epoch](http://en.wikipedia.org/wiki/Unix_time)) followed by the duration of the transmission.

```
#log.txt
1245300044,5 1245300057,4 1245300063,16 1245300094,13 1245300113,4 1245300120,14 1245300195,4 1245300295,4 1245300348,4 1245300697,7 1245300924,3 1245301157,4 1245301207,12 1245301563,4 1245302104,6 1245302114,6 1245302192,3 1245302349,4 1245302820,4 1245304812,13 1245308364,10 1245308413,14 1245312008,14 1245313953,11 1245314008,6 1245314584,4 1245314641,3 1245315212,5 1245315504,6 1245315604,13 1245315852,3 1245316255,6 1245316480,5 1245316803,3 1245316839,6 1245316848,11 1245316867,5 1245316875,12 1245316893,13 1245316912,59 1245316974,12 1245316988,21 1245317011,17 1245317044,10 1245317060,6 1245317071,7 1245317098,33 1245317140,96 1245317241,15 1245317259,14 1245317277,8 1245317298,18 1245317322,103 1245317435,40 1245317488,18 1245317508,34 1245317560,92 1245317658,29 1245317697,55 1245317755,33 1245317812,5 1245317818,7 1245317841,9 1245317865,25 1245317892,79 1245317972,30 1245318007,8 1245318021,60 1245318083,28 1245318114,23 1245318140,25 1245318167,341 1245318512,154 1245318670,160 1245318834,22 1245318859,9 1245318870,162 1245319042,57 1245319102,19 1245319123,30 1245319154,18 1245319206,5 1245319214,13 1245319229,6 1245319238,6 1245319331,9 1245319341,50 1245319397,71 1245319470,25 1245319497,40 1245319540,8 1245319551,77 1245319629,4 1245319638,36 1245319677,158 1245319837,25 1245319865,40 1245319907,33 1245319948,92 1245320043,26 1245320100,9 1245320111,34 1245320146,8 1245320159,6 1245320167,8 1245320181,12 1245320195,15 1245320212,14 1245320238,18 1245320263,46 1245320310,9 1245320326,22 1245320352,27 1245320381,15 1245320398,24 1245320425,57 1245320483,16 1245320501,40 1245320543,43 1245320589,65 1245320657,63 1245320722,129 1245320853,33 1245320889,50 1245320940,1485 1245322801,7 1245322809,103 1245322923,5 1245322929,66 1245323553,4 1245324203,15 1245324383,5 1245324570,7 1245324835,4 1245325200,8 1245325463,5 1245326414,12 1245327340,12 1245327836,4 1245327973,4 1245330006,12 1245331244,11 1245331938,11 1245332180,5 1245332187,81 1245332573,5 1245333609,12 1245334447,10 1245334924,9 1245334945,4 1245334971,4 1245335031,9 1245335076,11 1245335948,16 1245335965,27 1245335993,113 1245336107,79 1245336187,64 1245336253,37 1245336431,4 1245336588,5 1245336759,7 1245337048,3 1245337206,13 1245337228,4 1245337309,4 1245337486,6 1245337536,8 1245337565,38 1245337608,100 1245337713,25 1245337755,169 1245337930,8 1245337941,20 1245337967,6 1245337978,7 1245337996,20 1245338019,38 1245338060,127 1245338192,30 1245338227,22 1245338250,15 1245338272,15 1245338310,3 1245338508,4 1245338990,5 1245339136,5 1245339489,8 1245339765,4 1245340220,5 1245340233,6 1245340266,10 1245340278,22 1245340307,7 1245340315,28 1245340359,32 1245340395,4 1245340403,41 1245340446,46 1245340494,58 1245340554,17 1245340573,21 1245340599,3 1245340604,5 1245340611,46 1245340661,26 1245340747,4 1245340814,14 1245341043,4 1245341104,4 1245341672,4 1245341896,5 1245341906,3 1245342301,3 1245342649,6 1245342884,5 1245342929,4 1245343314,6 1245343324,10 1245343335,16 1245343353,39 1245343394,43 1245343439,62 1245343561,3 1245343790,4 1245344115,3 1245344189,5 1245344233,4 1245344241,6 1245344408,12 1245344829,3 1245345090,5 1245345457,5 1245345689,4 1245346086,3 1245347112,12 1245348006,14 1245348261,10 1245348873,4 1245348892,3 1245350303,11 1245350355,4 1245350766,5 1245350931,3 1245351605,14 1245351673,55 1245351729,23 1245351754,5 1245352123,37 1245352163,21 1245352186,18 1245352209,40 1245352251,49 1245352305,8 1245352315,5 1245352321,6 1245352329,22 1245352353,48 1245352404,77 1245352483,58 1245352543,17 1245352570,19 1245352635,5 1245352879,3 1245352899,5 1245352954,4 1245352962,6 1245352970,58 1245353031,21 1245353055,14 1245353071,52 1245353131,37 1245353170,201 1245353373,56 1245353431,18 1245353454,47 1245353502,13 1245353519,106 1245353627,10 1245353647,12 1245353660,30 1245353699,42 1245353746,28 1245353776,29 1245353806,9 1245353818,21 1245353841,10 1245353853,6 1245353862,224 1245354226,4 1245354964,63 1245355029,4 1245355036,142 1245355180,148 1245355330,7 1245355338,23 1245355363,9 1245355374,60 1245355437,142 1245355581,27 1245355609,5 1245355615,2 1245355630,64 1245355700,7 1245355709,73 1245355785,45 1245355834,85 1245355925,9 1245356234,5 1245356620,6 1245356629,12 1245356643,29 1245356676,120 1245356798,126 1245356937,62 1245357001,195 1245357210,17 1245357237,15 1245357258,24 1245357284,53 1245357339,2 1245357345,27 1245357374,76 1245357452,28 1245357482,42 1245357529,14 1245357545,35 1245357582,74 1245357661,30 1245357693,19 1245357714,38 1245357758,11 1245357777,37 1245357817,49 1245357868,19 1245357891,31 1245357931,48 1245357990,49 1245358043,24 1245358082,22 1245358108,17 1245358148,18 1245358168,7 1245358179,6 1245358186,19 1245358209,17 1245358229,5 1245358240,9 1245358252,10 1245358263,6 1245358272,9 1245358296,26 1245358328,49 1245358381,6 1245358389,38 1245358453,19 1245358476,24 1245358504,21 1245358533,76 1245358628,24 1245358653,10 1245358669,105 1245358781,20 1245358808,14 1245358836,6 1245358871,61 1245358933,0 1245358936,44 1245358982,11 1245358996,25 1245359023,15 1245359040,32 1245359076,19 1245359099,13 1245359117,16 1245359138,12 1245359161,33 1245359215,32 1245359249,14 1245359272,7 1245359314,10 1245359333,36 1245359371,21 1245359424,10 1245359447,61 1245359514,32 1245359560,42 1245359604,87 1245359700,60 1245359762,23 1245359786,4 1245359791,8 1245359803,6 1245359813,107 1245359922,29 1245359953,22 1245359978,86 1245360069,75 1245360147,22 1245360170,0 1245360184,41 1245360239,15 1245360256,34 1245360301,37 1245360339,1 1245360342,28 1245360372,20 1245360394,32 1245360440,24 1245360526,3 1245360728,3 1245361011,4 1245361026,35 1245361064,137 1245361359,5 1245362172,11 1245362225,21 1245362248,51 1245362302,20 1245362334,42 1245362418,12 1245362468,7 1245362557,9 1245362817,3 1245363175,4 1245363271,4 1245363446,3 1245363539,4 1245363573,4 1245363635,1 1245363637,3 1245363740,5 1245363875,3 1245364075,4 1245364354,14 1245364370,19 1245364391,49 1245364442,34 1245364478,23 1245364502,80 1245364633,15 1245364650,8 1245364673,16 1245364691,47 1245364739,53 1245364795,39 1245364836,25 1245365353,4 1245365640,11 1245365665,5 1245365726,8 1245365778,7 1245365982,4 1245366017,13 1245366042,6 1245366487,4 1245366493,4 1245366500,4 1245366507,3 1245366622,5 1245366690,5 1245366946,4 1245366953,16 1245366975,8 1245366996,7 1245367005,7 1245367031,6 1245367040,9 1245367051,7 1245367059,23 1245367084,76 1245367166,158 1245367740,4 1245367804,3 1245367847,4 1245367887,9 1245369300,10 1245369611,12 1245370038,10 1245370374,8 1245370668,5 1245370883,5 1245370927,7 1245370945,9 1245370961,16 1245370978,414 1245371398,135 1245371535,252 1245371791,238 1245372034,199 1245372621,4 1245372890,5 1245373043,7 1245373060,9 1245373073,6 1245373081,68 1245373151,10 1245373162,49 1245373212,79 1245373300,12 1245373313,38 1245373353,20 1245373374,59 1245373435,28 1245373465,94 1245373560,11 1245373574,53 1245373629,22 1245373654,6 1245373662,334 1245373998,169 1245374176,41 1245374219,26 1245374246,51 1245374299,31 1245374332,57 1245374391,55 1245374535,4 1245374759,7 1245374769,200 1245374971,215 1245375188,181 1245375371,81 1245375455,59 1245375516,33 1245375552,19 1245375572,56 1245375629,220 1245375850,32 1245375884,26 1245375948,7 1245375964,114 1245376473,4 1245376810,13 1245378296,10 1245378950,12 1245379004,3 1245379569,4 1245379582,4 1245379615,6 1245380030,3 1245380211,4 1245380412,14 1245380727,4 1245380850,4
```

__This log file__ is only 7.3 KB. At this rate, a years' worth of log data can be stored in less than 3MB of plain text files. The data presented here can be graphed (producing the image at the top of the page) using the following code:

```python
# pySquelchGrapher.py
import numpy
import datetime
import pylab
print "loading libraries...",
print "complete"


def loadData(fname="log.txt"):
    print "loading data...",
    # load signal/duration from log file
    f = open(fname)
    raw = f.read()
    f.close()
    raw = raw.replace('n', ' ')
    raw = raw.split(" ")
    signals = []
    for line in raw:
        if len(line) < 3:
            continue
        line = line.split(',')
        sec = datetime.datetime.fromtimestamp(int(line[0]))
        dur = int(line[1])
        signals.append([sec, dur])
    print "complete"
    return signals


def findDays(signals):
    # determine which days are in the log file
    print "finding days...",
    days = []
    for signal in signals:
        day = signal[0].date()
        if not day in days:
            days.append(day)
    print "complete"
    return days


def genMins(day):
    # generate an array for every minute in a certain day
    print "generating bins...",
    mins = []
    startTime = datetime.datetime(day.year, day.month, day.day)
    minute = datetime.timedelta(minutes=1)
    for i in xrange(60*60):
        mins.append(startTime+minute*i)
    print "complete"
    return mins


def fillMins(mins, signals):
    print "filling bins...",
    vals = [0]*len(mins)
    dayToDo = signals[0][0].date()
    for signal in signals:
        if not signal[0].date() == dayToDo:
            continue
        sec = signal[0]
        dur = signal[1]
        prebuf = sec.second
        minOfDay = sec.hour*60+sec.minute
        if dur+prebuf < 60:  # simple case, no rollover seconds
            vals[minOfDay] = dur
        else:  # if duration exceeds the minute the signal started in
            vals[minOfDay] = 60-prebuf
            dur = dur+prebuf
            while (dur > 0):  # add rollover seconds to subsequent minutes
                minOfDay += 1
                dur = dur-60
                if dur <= 0:
                    break
                if dur >= 60:
                    vals[minOfDay] = 60
                else:
                    vals[minOfDay] = dur
    print "complete"
    return vals


def normalize(vals):
    print "normalizing data...",
    divBy = float(max(vals))
    for i in xrange(len(vals)):
        vals[i] = vals[i]/divBy
    print "complete"
    return vals


def smoothListGaussian(list, degree=10):
    print "smoothing...",
    window = degree*2-1
    weight = numpy.array([1.0]*window)
    weightGauss = []
    for i in range(window):
        i = i-degree+1
        frac = i/float(window)
        gauss = 1/(numpy.exp((4*(frac))**2))
        weightGauss.append(gauss)
    weight = numpy.array(weightGauss)*weight
    smoothed = [0.0]*(len(list)-window)
    for i in range(len(smoothed)):
        smoothed[i] = sum(numpy.array(list[i:i+window])*weight)/sum(weight)
    while len(list) > len(smoothed)+int(window/2):
        smoothed.insert(0, smoothed[0])
    while len(list) > len(smoothed):
        smoothed.append(smoothed[0])
    print "complete"
    return smoothed


signals = loadData()
days = findDays(signals)
for day in days:
    mins = genMins(day)
    vals = normalize(fillMins(mins, signals))
    fig = pylab.figure()
    pylab.grid(alpha=.2)
    pylab.plot(mins, vals, 'k', alpha=.1)
    pylab.plot(mins, smoothListGaussian(vals), 'b', lw=1)
    pylab.axis([day, day+datetime.timedelta(days=1), None, None])
    fig.autofmt_xdate()
    pylab.title("147.120 MHz Usage for "+str(day))
    pylab.xlabel("time of day")
    pylab.ylabel("fractional usage")
    pylab.show()

```

May 20th, 2009

Graphing Computer Usage

I enjoy writing Python scripts to analyze and display linear data. One of my favorite blog entries is Linear Data Smoothing with Python, developed for my homemade electrocardiogram project. I installed a program called TimeTrack.exe on my work computer. It basically logs whenever you open or close a program. The data output looks like this:

"Firefox","Prototype of a Digital Biopsy Device - Mozilla Firefox","05/19/2009  9:45a","05/19/2009  9:45a","766ms","0.0"
"Firefox","Dual-Channel Mobile Surface Electromyograph - Mozilla Firefox","05/19/2009  9:46a","05/19/2009  9:46a","797ms","0.0"
"Windows Explorer","","03/24/2008  9:30a","05/19/2009  9:48a","49d 6h 9m","20.7"
"Windows Explorer","09_04_07_RA_SA_AV","05/19/2009  8:48a","05/19/2009  8:48a","1.0s","0.0"
"Windows Explorer","Image003.jpg - Windows Picture and Fax Viewer","05/18/2009  4:03p","05/18/2009  4:03p","1.2s","0.0"

I have a 13 MB file containing lines like this which I parse, condense, analyze, and display with Python. The script finds the first and last entry time and creates a dictionary where keys are the hours between the 1st and last log lines, parses the log, determines which time block each entry belongs to, and increments the integer (value of the dictionary) for its respective key. Something similar is repeated, but with respect to days rather than hours. The result is:

The code I used to generate this graph is:

# This script analyzes data exported from "TimeTrack" (a free computer usage
# monitoring program for windows) and graphs the data visually.

import time, pylab, datetime, numpy

# This is my computer usage data.  Generate yours however you want.
allHours = ['2008_10_29 0', '2009_03_11 5', '2009_04_09 5', '2008_07_04 10',
'2008_12_18 9', '2009_01_30 12', '2008_09_04 7', '2008_05_17 1',
'2008_05_11 5', '2008_11_03 3', '2008_05_21 3', '2009_02_19 11',
'2008_08_15 13', '2008_04_02 4', '2008_07_16 5', '2008_09_16 8',
'2008_04_10 5', '2009_05_10 1', '2008_12_30 4', '2008_06_07 2',
'2008_11_23 0', '2008_08_03 0', '2008_04_30 4', '2008_07_28 9',
'2008_05_19 0', '2009_03_30 7', '2008_06_19 3', '2009_01_24 3',
'2008_08_23 6', '2008_12_01 0', '2009_02_23 6', '2008_11_27 0',
'2008_05_02 5', '2008_10_20 13', '2008_03_27 5', '2009_04_02 9',
'2009_02_21 0', '2008_09_13 1', '2008_12_13 0', '2009_04_14 11',
'2009_01_31 7', '2008_11_04 10', '2008_07_09 6', '2008_10_24 10',
'2009_02_22 0', '2008_09_25 12', '2008_12_25 0', '2008_05_26 4',
'2009_05_01 10', '2009_04_26 11', '2008_08_10 8', '2008_11_08 6',
'2008_07_21 12', '2009_04_21 3', '2009_05_13 8', '2009_02_02 8',
'2008_10_07 2', '2008_06_10 6', '2008_09_21 0', '2009_03_17 9',
'2008_08_30 7', '2008_11_28 4', '2009_02_14 0', '2009_01_22 6',
'2008_10_11 0', '2008_06_22 8', '2008_12_04 0', '2008_03_28 0',
'2009_04_07 2', '2008_09_10 0', '2008_05_15 5', '2008_08_18 12',
'2008_10_31 5', '2009_03_09 7', '2009_02_25 8', '2008_07_02 4',
'2008_12_16 7', '2008_09_06 2', '2009_01_26 5', '2009_04_19 0',
'2008_07_14 13', '2008_11_01 5', '2009_01_18 0', '2009_05_04 0',
'2008_08_13 10', '2009_02_27 3', '2009_01_16 12', '2008_09_18 8',
'2009_02_03 7', '2008_06_01 0', '2008_12_28 0', '2008_07_26 0',
'2008_11_21 1', '2008_08_01 8', '2008_04_28 3', '2009_05_16 0',
'2008_06_13 5', '2008_10_02 11', '2009_03_28 6', '2008_08_21 7',
'2009_01_13 6', '2008_11_25 4', '2008_06_25 1', '2008_10_22 11',
'2008_03_25 6', '2009_02_07 6', '2008_12_11 4', '2009_01_01 4',
'2008_09_15 2', '2009_02_05 12', '2008_07_07 9', '2009_04_12 0',
'2008_04_11 5', '2008_10_26 4', '2008_05_28 3', '2008_09_27 14',
'2009_05_03 0', '2008_12_23 5', '2009_05_12 10', '2008_11_14 3',
'2008_07_19 0', '2009_04_24 8', '2008_04_07 1', '2008_08_08 11',
'2008_06_04 0', '2009_05_15 12', '2009_03_23 13', '2009_02_01 10',
'2008_09_23 11', '2009_02_08 3', '2008_08_28 4', '2008_11_18 9',
'2008_07_31 7', '2008_10_13 0', '2008_06_16 9', '2009_03_27 6',
'2008_12_02 0', '2008_05_01 7', '2009_04_05 1', '2008_08_16 9',
'2009_03_15 0', '2008_04_16 6', '2008_10_17 4', '2008_06_28 5',
'2009_01_28 10', '2008_04_18 0', '2008_12_14 0', '2008_11_07 6',
'2009_04_17 7', '2008_04_14 7', '2008_07_12 0', '2009_01_15 7',
'2009_05_06 8', '2008_12_26 0', '2008_06_03 7', '2008_09_28 0',
'2008_05_25 4', '2008_08_07 8', '2008_04_26 7', '2008_07_24 1',
'2008_04_20 0', '2008_11_11 4', '2009_04_29 0', '2008_10_04 0',
'2009_05_18 9', '2009_03_18 4', '2008_06_15 8', '2009_02_13 6',
'2008_05_04 5', '2009_03_04 2', '2009_03_06 3', '2008_05_06 0',
'2008_08_27 11', '2008_04_22 0', '2009_03_26 6', '2008_03_31 9',
'2008_06_27 5', '2008_10_08 4', '2008_09_09 4', '2008_12_09 3',
'2008_05_10 0', '2008_05_14 5', '2009_04_10 0', '2009_01_11 0',
'2008_07_05 8', '2009_01_05 7', '2008_10_28 0', '2009_02_18 11',
'2009_03_10 7', '2008_05_30 3', '2008_09_05 7', '2008_12_21 6',
'2009_03_02 6', '2008_08_14 5', '2008_11_12 5', '2008_07_17 8',
'2008_04_05 6', '2009_04_22 11', '2009_05_09 0', '2008_06_06 0',
'2009_01_03 0', '2008_09_17 6', '2009_03_21 3', '2009_02_10 7',
'2008_05_08 4', '2008_08_02 0', '2008_11_16 0', '2008_07_29 12',
'2008_10_15 5', '2008_06_18 5', '2009_03_25 2', '2009_01_10 0',
'2009_04_03 5', '2008_08_22 7', '2009_03_13 11', '2008_10_19 0',
'2008_06_30 8', '2008_09_02 9', '2008_05_23 4', '2008_12_12 7',
'2008_07_10 11', '2008_11_05 8', '2008_04_12 4', '2009_04_15 7',
'2008_12_24 1', '2008_09_30 0', '2008_05_27 2', '2008_08_05 10',
'2008_04_24 6', '2009_04_27 6', '2008_07_22 3', '2008_11_09 1',
'2008_06_09 6', '2008_10_06 14', '2009_03_16 7', '2008_05_22 5',
'2009_01_29 12', '2008_11_29 4', '2008_04_09 7', '2008_08_25 12',
'2009_02_15 0', '2008_03_29 7', '2008_06_21 7', '2008_10_10 9',
'2008_05_12 6', '2009_02_16 10', '2008_09_11 11', '2008_12_07 0',
'2008_07_03 6', '2009_04_08 3', '2009_01_23 7', '2009_01_27 5',
'2008_10_30 0', '2009_03_08 0', '2009_01_21 8', '2008_12_19 0',
'2008_05_16 2', '2009_01_25 1', '2009_02_26 5', '2008_09_07 2',
'2008_04_03 1', '2008_08_12 6', '2008_04_13 10', '2008_11_02 0',
'2008_07_15 0', '2009_04_20 3', '2009_02_24 10', '2009_05_11 8',
'2008_12_31 8', '2008_04_15 7', '2008_09_19 10', '2009_01_19 0',
'2008_11_22 3', '2008_07_27 2', '2009_02_04 7', '2009_03_31 1',
'2008_05_24 3', '2008_10_01 8', '2008_06_12 6', '2009_01_12 11',
'2008_11_26 8', '2009_04_01 10', '2009_02_28 0', '2008_08_20 6',
'2008_10_21 10', '2008_06_24 4', '2008_03_26 4', '2008_12_10 0',
'2008_09_12 0', '2008_05_09 7', '2009_02_17 7', '2008_07_08 6',
'2008_10_25 5', '2009_04_13 9', '2009_05_02 0', '2008_12_22 8',
'2008_09_24 9', '2009_01_20 5', '2008_11_15 6', '2009_04_25 10',
'2008_08_11 9', '2008_04_06 8', '2008_07_20 1', '2009_03_22 3',
'2008_06_11 6', '2008_09_20 3', '2009_05_14 10', '2008_11_19 0',
'2008_08_31 2', '2009_02_09 8', '2008_10_12 0', '2008_04_25 5',
'2008_06_23 4', '2009_01_07 8', '2008_08_19 0', '2008_12_05 2',
'2008_07_01 8', '2008_10_16 6', '2009_04_06 3', '2009_03_14 5',
'2008_09_01 2', '2008_12_17 14', '2008_05_18 7', '2008_04_01 2',
'2009_04_18 0', '2008_04_17 0', '2008_07_13 0', '2008_06_02 10',
'2008_09_29 6', '2008_12_29 0', '2009_05_05 8', '2008_04_19 0',
'2009_04_30 8', '2008_08_06 4', '2008_11_20 0', '2008_07_25 6',
'2009_02_06 6', '2009_03_29 3', '2009_05_17 0', '2009_03_19 7',
'2008_10_03 1', '2008_06_14 3', '2008_05_07 5', '2008_08_26 3',
'2008_11_24 9', '2008_04_21 8', '2008_04_23 4', '2008_10_23 11',
'2008_06_26 4', '2008_03_24 8', '2008_12_08 5', '2008_09_14 2',
'2009_01_02 6', '2008_04_08 0', '2008_10_27 6', '2009_04_11 0',
'2008_07_06 0', '2008_12_20 3', '2009_04_23 6', '2008_09_26 9',
'2008_05_31 0', '2008_07_18 4', '2008_11_13 6', '2008_08_09 2',
'2008_04_04 0', '2009_03_20 5', '2008_09_22 7', '2009_05_08 9',
'2008_06_05 7', '2008_07_30 7', '2008_11_17 10', '2008_05_03 0',
'2008_08_29 3', '2009_02_11 12', '2009_01_08 8', '2008_06_17 0',
'2008_10_14 7', '2009_03_24 11', '2008_08_17 6', '2008_12_03 0',
'2009_01_09 4', '2008_05_29 5', '2008_06_29 9', '2008_10_18 5',
'2009_04_04 0', '2008_12_15 10', '2009_03_12 0', '2009_03_05 7',
'2008_05_20 4', '2008_09_03 7', '2009_03_07 8', '2009_01_14 6',
'2008_05_05 5', '2008_11_06 7', '2008_07_11 6', '2009_04_16 9',
'2009_02_20 0', '2008_12_27 0', '2009_01_17 0', '2009_05_07 7',
'2008_11_10 5', '2008_07_23 11', '2009_04_28 0', '2008_04_27 2',
'2008_08_04 0', '2009_03_01 11', '2008_10_05 0', '2008_06_08 8',
'2009_05_19 5', '2008_04_29 4', '2008_11_30 0', '2009_01_06 8',
'2009_02_12 3', '2008_08_24 2', '2009_03_03 10', '2008_10_09 6',
'2008_06_20 2', '2008_05_13 10', '2008_12_06 0', '2008_03_30 7']

def genTimes():
    ## opens  exported timetrack data (CSV) and re-saves a compressed version.
    print "ANALYZING..."
    f=open('timetrack.txt')
    raw=f.readlines()
    f.close()
    times=["05/15/2009 12:00am"] #start time
    for line in raw[1:]:
        if not line.count('","') == 5: continue
        test = line.strip("n")[1:-1].split('","')[-3].replace("  "," ")+"m"
        test = test.replace(" 0:"," 12:")
        times.append(test) #end time
        test = line.strip("n")[1:-1].split('","')[-4].replace("  "," ")+"m"
        test = test.replace(" 0:"," 12:")
        times.append(test) #start time

    times.sort()
    print "WRITING..."
    f=open('times.txt','w')
    f.write(str(times))
    f.close()

def loadTimes():
    ## loads the times from the compressed file.
    f=open("times.txt")
    times = eval(f.read())
    newtimes=[]
    f.close()
    for i in range(len(times)):
        if "s" in times[i]: print times[i]
        newtimes.append(datetime.datetime(*time.strptime(times[i],
                                        "%m/%d/%Y %I:%M%p")[0:5]))
        #if i&gt;1000: break #for debugging
    newtimes.sort()
    return newtimes

def linearize(times):
    ## does all the big math to calculate hours per day.
    for i in range(len(times)):
        times[i]=times[i]-datetime.timedelta(minutes=times[i].minute,
                                             seconds=times[i].second)
    hr = datetime.timedelta(hours=1)
    pos = times[0]-hr
    counts = {}
    days = {}
    lasthr=pos
    lastday=None
    while pos1:counts[pos]=1 #flatten
        if not daypos in days: days[daypos]=0
        if not lasthr == pos:
            if counts[pos]&gt;0:
                days[daypos]=days[daypos]+1
                lasthr=pos
        pos+=hr
    return days #[counts,days]

def genHours(days):
    ## outputs the hours per day as a file.
    out=""
    for day in days:
        print day
        out+="%s %in"%(day.strftime("%Y_%m_%d"),days[day])
    f=open('hours.txt','w')
    f.write(out)
    f.close()
    return

def smoothListGaussian(list,degree=7):
    ## (from an article I wrote) - Google "linear data smoothing with python".
    firstlen=len(list)
    window=degree*2-1
    weight=numpy.array([1.0]*window)
    weightGauss=[]
    for i in range(window):
     i=i-degree+1
     frac=i/float(window)
     gauss=1/(numpy.exp((4*(frac))**2))
     weightGauss.append(gauss)
    weight=numpy.array(weightGauss)*weight
    smoothed=[0.0]*(len(list)-window)
    for i in range(len(smoothed)):
     smoothed[i]=sum(numpy.array(list[i:i+window])*weight)/sum(weight)
    pad_before = [smoothed[0]]*((firstlen-len(smoothed))/2)
    pad_after  = [smoothed[-1]]*((firstlen-len(smoothed))/2+1)
    return pad_before+smoothed+pad_after

### IF YOU USE MY DATA, YOU ONLY USE THE FOLLOWING CODE ###

def graphIt():
    ## Graph the data!
    #f=open('hours.txt')
    #data=f.readlines()
    data=allHours
    data.sort()
    f.close()
    days,hours=[],[]
    for i in range(len(data)):
        day = data[i].split(" ")
        if int(day[1])&lt;4: continue
        days.append(datetime.datetime.strptime(day[0], "%Y_%m_%d"))
        hours.append(int(day[1]))
    fig=pylab.figure(figsize=(14,5))
    pylab.plot(days,smoothListGaussian(hours,1),'.',color='.5',label="single day")
    pylab.plot(days,smoothListGaussian(hours,1),'-',color='.8')
    pylab.plot(days,smoothListGaussian(hours,7),color='b',label="7-day gausian average")
    pylab.axhline(8,color='k',ls=":")
    pylab.title("Computer Usage at Work")
    pylab.ylabel("hours (rounded)")
    pylab.legend()
    pylab.show()
    return

#times = genTimes()
#genHours(linearize(loadTimes()))
graphIt()
Markdown source code last modified on January 18th, 2021
---
title: Graphing Computer Usage
date: 2009-05-20 08:44:57
tags: python, old
---

# Graphing Computer Usage

__I enjoy writing Python scripts to analyze and display linear data.__ One of my favorite blog entries is [Linear Data Smoothing with Python](http://www.swharden.com/blog/2008-11-17-linear-data-smoothing-in-python/), developed for my [homemade electrocardiogram](http://www.swharden.com/blog/category/diy-ecg-home-made-electrocardiogram/) project. I installed a program called TimeTrack.exe on my work computer. It basically logs whenever you open or close a program. The data output looks like this:

```
"Firefox","Prototype of a Digital Biopsy Device - Mozilla Firefox","05/19/2009  9:45a","05/19/2009  9:45a","766ms","0.0"
"Firefox","Dual-Channel Mobile Surface Electromyograph - Mozilla Firefox","05/19/2009  9:46a","05/19/2009  9:46a","797ms","0.0"
"Windows Explorer","","03/24/2008  9:30a","05/19/2009  9:48a","49d 6h 9m","20.7"
"Windows Explorer","09_04_07_RA_SA_AV","05/19/2009  8:48a","05/19/2009  8:48a","1.0s","0.0"
"Windows Explorer","Image003.jpg - Windows Picture and Fax Viewer","05/18/2009  4:03p","05/18/2009  4:03p","1.2s","0.0"
```

__I have a 13 MB file containing lines like this__ which I parse, condense, analyze, and display with Python. The script finds the first and last entry time and creates a dictionary where keys are the hours between the 1st and last log lines, parses the log, determines which time block each entry belongs to, and increments the integer (value of the dictionary) for its respective key. Something similar is repeated, but with respect to days rather than hours. The result is:

<div class="text-center">

[![](compusage_white_thumb.jpg)](compusage_white.png)

</div>

The code I used to generate this graph is:

```python
# This script analyzes data exported from "TimeTrack" (a free computer usage
# monitoring program for windows) and graphs the data visually.

import time, pylab, datetime, numpy

# This is my computer usage data.  Generate yours however you want.
allHours = ['2008_10_29 0', '2009_03_11 5', '2009_04_09 5', '2008_07_04 10',
'2008_12_18 9', '2009_01_30 12', '2008_09_04 7', '2008_05_17 1',
'2008_05_11 5', '2008_11_03 3', '2008_05_21 3', '2009_02_19 11',
'2008_08_15 13', '2008_04_02 4', '2008_07_16 5', '2008_09_16 8',
'2008_04_10 5', '2009_05_10 1', '2008_12_30 4', '2008_06_07 2',
'2008_11_23 0', '2008_08_03 0', '2008_04_30 4', '2008_07_28 9',
'2008_05_19 0', '2009_03_30 7', '2008_06_19 3', '2009_01_24 3',
'2008_08_23 6', '2008_12_01 0', '2009_02_23 6', '2008_11_27 0',
'2008_05_02 5', '2008_10_20 13', '2008_03_27 5', '2009_04_02 9',
'2009_02_21 0', '2008_09_13 1', '2008_12_13 0', '2009_04_14 11',
'2009_01_31 7', '2008_11_04 10', '2008_07_09 6', '2008_10_24 10',
'2009_02_22 0', '2008_09_25 12', '2008_12_25 0', '2008_05_26 4',
'2009_05_01 10', '2009_04_26 11', '2008_08_10 8', '2008_11_08 6',
'2008_07_21 12', '2009_04_21 3', '2009_05_13 8', '2009_02_02 8',
'2008_10_07 2', '2008_06_10 6', '2008_09_21 0', '2009_03_17 9',
'2008_08_30 7', '2008_11_28 4', '2009_02_14 0', '2009_01_22 6',
'2008_10_11 0', '2008_06_22 8', '2008_12_04 0', '2008_03_28 0',
'2009_04_07 2', '2008_09_10 0', '2008_05_15 5', '2008_08_18 12',
'2008_10_31 5', '2009_03_09 7', '2009_02_25 8', '2008_07_02 4',
'2008_12_16 7', '2008_09_06 2', '2009_01_26 5', '2009_04_19 0',
'2008_07_14 13', '2008_11_01 5', '2009_01_18 0', '2009_05_04 0',
'2008_08_13 10', '2009_02_27 3', '2009_01_16 12', '2008_09_18 8',
'2009_02_03 7', '2008_06_01 0', '2008_12_28 0', '2008_07_26 0',
'2008_11_21 1', '2008_08_01 8', '2008_04_28 3', '2009_05_16 0',
'2008_06_13 5', '2008_10_02 11', '2009_03_28 6', '2008_08_21 7',
'2009_01_13 6', '2008_11_25 4', '2008_06_25 1', '2008_10_22 11',
'2008_03_25 6', '2009_02_07 6', '2008_12_11 4', '2009_01_01 4',
'2008_09_15 2', '2009_02_05 12', '2008_07_07 9', '2009_04_12 0',
'2008_04_11 5', '2008_10_26 4', '2008_05_28 3', '2008_09_27 14',
'2009_05_03 0', '2008_12_23 5', '2009_05_12 10', '2008_11_14 3',
'2008_07_19 0', '2009_04_24 8', '2008_04_07 1', '2008_08_08 11',
'2008_06_04 0', '2009_05_15 12', '2009_03_23 13', '2009_02_01 10',
'2008_09_23 11', '2009_02_08 3', '2008_08_28 4', '2008_11_18 9',
'2008_07_31 7', '2008_10_13 0', '2008_06_16 9', '2009_03_27 6',
'2008_12_02 0', '2008_05_01 7', '2009_04_05 1', '2008_08_16 9',
'2009_03_15 0', '2008_04_16 6', '2008_10_17 4', '2008_06_28 5',
'2009_01_28 10', '2008_04_18 0', '2008_12_14 0', '2008_11_07 6',
'2009_04_17 7', '2008_04_14 7', '2008_07_12 0', '2009_01_15 7',
'2009_05_06 8', '2008_12_26 0', '2008_06_03 7', '2008_09_28 0',
'2008_05_25 4', '2008_08_07 8', '2008_04_26 7', '2008_07_24 1',
'2008_04_20 0', '2008_11_11 4', '2009_04_29 0', '2008_10_04 0',
'2009_05_18 9', '2009_03_18 4', '2008_06_15 8', '2009_02_13 6',
'2008_05_04 5', '2009_03_04 2', '2009_03_06 3', '2008_05_06 0',
'2008_08_27 11', '2008_04_22 0', '2009_03_26 6', '2008_03_31 9',
'2008_06_27 5', '2008_10_08 4', '2008_09_09 4', '2008_12_09 3',
'2008_05_10 0', '2008_05_14 5', '2009_04_10 0', '2009_01_11 0',
'2008_07_05 8', '2009_01_05 7', '2008_10_28 0', '2009_02_18 11',
'2009_03_10 7', '2008_05_30 3', '2008_09_05 7', '2008_12_21 6',
'2009_03_02 6', '2008_08_14 5', '2008_11_12 5', '2008_07_17 8',
'2008_04_05 6', '2009_04_22 11', '2009_05_09 0', '2008_06_06 0',
'2009_01_03 0', '2008_09_17 6', '2009_03_21 3', '2009_02_10 7',
'2008_05_08 4', '2008_08_02 0', '2008_11_16 0', '2008_07_29 12',
'2008_10_15 5', '2008_06_18 5', '2009_03_25 2', '2009_01_10 0',
'2009_04_03 5', '2008_08_22 7', '2009_03_13 11', '2008_10_19 0',
'2008_06_30 8', '2008_09_02 9', '2008_05_23 4', '2008_12_12 7',
'2008_07_10 11', '2008_11_05 8', '2008_04_12 4', '2009_04_15 7',
'2008_12_24 1', '2008_09_30 0', '2008_05_27 2', '2008_08_05 10',
'2008_04_24 6', '2009_04_27 6', '2008_07_22 3', '2008_11_09 1',
'2008_06_09 6', '2008_10_06 14', '2009_03_16 7', '2008_05_22 5',
'2009_01_29 12', '2008_11_29 4', '2008_04_09 7', '2008_08_25 12',
'2009_02_15 0', '2008_03_29 7', '2008_06_21 7', '2008_10_10 9',
'2008_05_12 6', '2009_02_16 10', '2008_09_11 11', '2008_12_07 0',
'2008_07_03 6', '2009_04_08 3', '2009_01_23 7', '2009_01_27 5',
'2008_10_30 0', '2009_03_08 0', '2009_01_21 8', '2008_12_19 0',
'2008_05_16 2', '2009_01_25 1', '2009_02_26 5', '2008_09_07 2',
'2008_04_03 1', '2008_08_12 6', '2008_04_13 10', '2008_11_02 0',
'2008_07_15 0', '2009_04_20 3', '2009_02_24 10', '2009_05_11 8',
'2008_12_31 8', '2008_04_15 7', '2008_09_19 10', '2009_01_19 0',
'2008_11_22 3', '2008_07_27 2', '2009_02_04 7', '2009_03_31 1',
'2008_05_24 3', '2008_10_01 8', '2008_06_12 6', '2009_01_12 11',
'2008_11_26 8', '2009_04_01 10', '2009_02_28 0', '2008_08_20 6',
'2008_10_21 10', '2008_06_24 4', '2008_03_26 4', '2008_12_10 0',
'2008_09_12 0', '2008_05_09 7', '2009_02_17 7', '2008_07_08 6',
'2008_10_25 5', '2009_04_13 9', '2009_05_02 0', '2008_12_22 8',
'2008_09_24 9', '2009_01_20 5', '2008_11_15 6', '2009_04_25 10',
'2008_08_11 9', '2008_04_06 8', '2008_07_20 1', '2009_03_22 3',
'2008_06_11 6', '2008_09_20 3', '2009_05_14 10', '2008_11_19 0',
'2008_08_31 2', '2009_02_09 8', '2008_10_12 0', '2008_04_25 5',
'2008_06_23 4', '2009_01_07 8', '2008_08_19 0', '2008_12_05 2',
'2008_07_01 8', '2008_10_16 6', '2009_04_06 3', '2009_03_14 5',
'2008_09_01 2', '2008_12_17 14', '2008_05_18 7', '2008_04_01 2',
'2009_04_18 0', '2008_04_17 0', '2008_07_13 0', '2008_06_02 10',
'2008_09_29 6', '2008_12_29 0', '2009_05_05 8', '2008_04_19 0',
'2009_04_30 8', '2008_08_06 4', '2008_11_20 0', '2008_07_25 6',
'2009_02_06 6', '2009_03_29 3', '2009_05_17 0', '2009_03_19 7',
'2008_10_03 1', '2008_06_14 3', '2008_05_07 5', '2008_08_26 3',
'2008_11_24 9', '2008_04_21 8', '2008_04_23 4', '2008_10_23 11',
'2008_06_26 4', '2008_03_24 8', '2008_12_08 5', '2008_09_14 2',
'2009_01_02 6', '2008_04_08 0', '2008_10_27 6', '2009_04_11 0',
'2008_07_06 0', '2008_12_20 3', '2009_04_23 6', '2008_09_26 9',
'2008_05_31 0', '2008_07_18 4', '2008_11_13 6', '2008_08_09 2',
'2008_04_04 0', '2009_03_20 5', '2008_09_22 7', '2009_05_08 9',
'2008_06_05 7', '2008_07_30 7', '2008_11_17 10', '2008_05_03 0',
'2008_08_29 3', '2009_02_11 12', '2009_01_08 8', '2008_06_17 0',
'2008_10_14 7', '2009_03_24 11', '2008_08_17 6', '2008_12_03 0',
'2009_01_09 4', '2008_05_29 5', '2008_06_29 9', '2008_10_18 5',
'2009_04_04 0', '2008_12_15 10', '2009_03_12 0', '2009_03_05 7',
'2008_05_20 4', '2008_09_03 7', '2009_03_07 8', '2009_01_14 6',
'2008_05_05 5', '2008_11_06 7', '2008_07_11 6', '2009_04_16 9',
'2009_02_20 0', '2008_12_27 0', '2009_01_17 0', '2009_05_07 7',
'2008_11_10 5', '2008_07_23 11', '2009_04_28 0', '2008_04_27 2',
'2008_08_04 0', '2009_03_01 11', '2008_10_05 0', '2008_06_08 8',
'2009_05_19 5', '2008_04_29 4', '2008_11_30 0', '2009_01_06 8',
'2009_02_12 3', '2008_08_24 2', '2009_03_03 10', '2008_10_09 6',
'2008_06_20 2', '2008_05_13 10', '2008_12_06 0', '2008_03_30 7']

def genTimes():
    ## opens  exported timetrack data (CSV) and re-saves a compressed version.
    print "ANALYZING..."
    f=open('timetrack.txt')
    raw=f.readlines()
    f.close()
    times=["05/15/2009 12:00am"] #start time
    for line in raw[1:]:
        if not line.count('","') == 5: continue
        test = line.strip("n")[1:-1].split('","')[-3].replace("  "," ")+"m"
        test = test.replace(" 0:"," 12:")
        times.append(test) #end time
        test = line.strip("n")[1:-1].split('","')[-4].replace("  "," ")+"m"
        test = test.replace(" 0:"," 12:")
        times.append(test) #start time

    times.sort()
    print "WRITING..."
    f=open('times.txt','w')
    f.write(str(times))
    f.close()

def loadTimes():
    ## loads the times from the compressed file.
    f=open("times.txt")
    times = eval(f.read())
    newtimes=[]
    f.close()
    for i in range(len(times)):
        if "s" in times[i]: print times[i]
        newtimes.append(datetime.datetime(*time.strptime(times[i],
                                        "%m/%d/%Y %I:%M%p")[0:5]))
        #if i&gt;1000: break #for debugging
    newtimes.sort()
    return newtimes

def linearize(times):
    ## does all the big math to calculate hours per day.
    for i in range(len(times)):
        times[i]=times[i]-datetime.timedelta(minutes=times[i].minute,
                                             seconds=times[i].second)
    hr = datetime.timedelta(hours=1)
    pos = times[0]-hr
    counts = {}
    days = {}
    lasthr=pos
    lastday=None
    while pos1:counts[pos]=1 #flatten
        if not daypos in days: days[daypos]=0
        if not lasthr == pos:
            if counts[pos]&gt;0:
                days[daypos]=days[daypos]+1
                lasthr=pos
        pos+=hr
    return days #[counts,days]

def genHours(days):
    ## outputs the hours per day as a file.
    out=""
    for day in days:
        print day
        out+="%s %in"%(day.strftime("%Y_%m_%d"),days[day])
    f=open('hours.txt','w')
    f.write(out)
    f.close()
    return

def smoothListGaussian(list,degree=7):
    ## (from an article I wrote) - Google "linear data smoothing with python".
    firstlen=len(list)
    window=degree*2-1
    weight=numpy.array([1.0]*window)
    weightGauss=[]
    for i in range(window):
     i=i-degree+1
     frac=i/float(window)
     gauss=1/(numpy.exp((4*(frac))**2))
     weightGauss.append(gauss)
    weight=numpy.array(weightGauss)*weight
    smoothed=[0.0]*(len(list)-window)
    for i in range(len(smoothed)):
     smoothed[i]=sum(numpy.array(list[i:i+window])*weight)/sum(weight)
    pad_before = [smoothed[0]]*((firstlen-len(smoothed))/2)
    pad_after  = [smoothed[-1]]*((firstlen-len(smoothed))/2+1)
    return pad_before+smoothed+pad_after

### IF YOU USE MY DATA, YOU ONLY USE THE FOLLOWING CODE ###

def graphIt():
    ## Graph the data!
    #f=open('hours.txt')
    #data=f.readlines()
    data=allHours
    data.sort()
    f.close()
    days,hours=[],[]
    for i in range(len(data)):
        day = data[i].split(" ")
        if int(day[1])&lt;4: continue
        days.append(datetime.datetime.strptime(day[0], "%Y_%m_%d"))
        hours.append(int(day[1]))
    fig=pylab.figure(figsize=(14,5))
    pylab.plot(days,smoothListGaussian(hours,1),'.',color='.5',label="single day")
    pylab.plot(days,smoothListGaussian(hours,1),'-',color='.8')
    pylab.plot(days,smoothListGaussian(hours,7),color='b',label="7-day gausian average")
    pylab.axhline(8,color='k',ls=":")
    pylab.title("Computer Usage at Work")
    pylab.ylabel("hours (rounded)")
    pylab.legend()
    pylab.show()
    return

#times = genTimes()
#genHours(linearize(loadTimes()))
graphIt()
```
January 29th, 2009

Analyzing my Writings with Python

I had some free time in lab today (between steps while of an immunohistochemistry experiment) so I decided to further investigate the field of bioinformatics. I was curious if it may be worth seeking a PhD in bioinformatics if I don't get into dental school. UCF offers a PhD in bioinformatics but it's a new and small department (I think there are only 4 faculty). A degree in bioinformatics combines molecular biology (DNA, proteins, etc), computer science (programming), and statistics.

I came across a paper today: Structural Alignment of Pseudoknotted RNA which is a good example of the practice of bioinformatics. Think about what goes on in a cell... the sequence of a gene (a short region of DNA) is copied (letter-by-letter) onto an RNA molecule. The RNA molecule is later read by an enzyme (called a ribosome) and converted into a protein based on its sequence. (This process is the central dogma of molecular biology) Traditionally, it was believed that RNA molecules' only function was to copy gene sequences from DNA to ribosomes, but recently (the last several years) it was discovered that some small RNA molecules are never read and turned into proteins, but rather serve their own unique functions! For example, some RNA molecules (siRNAs) can actually turn genes on and off, and have been associated with cancer development and other immune diseases. Given the human genome (the ~3 billion letter long sequence all of our DNA), how can we determine what regions form these functional RNA molecules which don't get converted into proteins? The paper I mentioned earlier addresses this. An algorithm was developed and used to test regions of DNA and predict its probability of forming small RNA molecules. Spikes in this trace (figure 7 of the paper) represent areas of the DNA which are likely to form these RNA molecules. (Is this useful? What if you were to compare these results between normal person and someone with cancer?)

After reading the article I considered how similar my current programming projects are with this one (e.g., my recent DIY ECG projects). The paper shows a trace of score (likelihood that a region of DNA forms an RNA molecule) where peaks represent likely locations of RNA formation. Just generate the trace, determine the positions of the peaks, and you're golden. How similar is this to the work I've been doing with my homemade ECG machine, where I perform signal analysis to eliminate electrical noise and then analyze the resulting trace to isolate and identify peaks corresponding to heartbeats?

I got the itch to write my own string-analysis program. It reads the content of my website (exported from a SQL query), splits it up by date, and analyzes it. Ultimately I want to track the usage of certain words, but for now I wrote a script which plots the number of words I wrote.

Pretty cool huh? Check out all those spikes between 2004 and 2005! Not only are they numerous (meaning many posts), but they're also high (meaning many words per post). As you can see by the top trace, the most significant contribution to my site occurred during this time. So, let's zoom in on it.

Here is the code I used to produce this image.

"""Convert SQL backups of my WordPress blog into charts"""

import datetime, pylab, numpy

class blogChrono():
    baseUrl="http://www.SWHarden.com/blog"
    posts=[]
    dates=[]
    def __init__(self,fname):
        self.fname=fname
        self.load()
    def load(self):
        print "loading [%s]..."%self.fname,
        f=open(self.fname)
        raw=f.readlines()
        f.close()
        for line in raw:
            if "INSERT INTO" in line
            and';' in line[-2:-1]
            and " 'post'," in line[-20:-1]:
                post={}
                line=line.split("VALUES(",1)[1][:-3]
                line=line.replace(', NULL',', None')
                line=line.replace(", '',",", None,")
                line=line.replace("''","")
                c= line.split(',',4)[4][::-1]
                c= c.split(" ,",21)
                text=c[-1]
                text=text[::-1]
                text=text[2:-1]
                text=text.replace('"""','###')
                line=line.replace(text,'blogtext')
                line=line.replace(', ,',', None,')
                line=eval("["+line+"]")
                if len(line[4])&gt;len('blogtext'):
                    x=str(line[4].split(', '))[2:-2]
                    raw=str(line)
                    raw=raw.replace(line[4],x)
                    line=eval(raw)
                post["id"]=int(line[0])
                post["date"]=datetime.datetime.strptime(line[2],
                                                        "%Y-%m-%d %H:%M:%S")
                post["text"]=eval('"""'+text+' """')
                post["title"]=line[5]
                post["url"]=line[21]
                post["comm"]=int(line[25])
                post["words"]=post["text"].count(" ")
                self.dates.append(post["date"])
                self.posts.append(post)
        self.dates.sort()
        d=self.dates[:]
        i,newposts=0,[]
        while len(self.posts)&gt;0:
            die=min(self.dates)
            for post in self.posts:
                if post["date"]==die:
                    self.dates.remove(die)
                    newposts.append(post)
                    self.posts.remove(post)
        self.posts,self.dates=newposts,d
        print "read %d posts!n"%len(self.posts)

#d=blogChrono('sml.sql')
d=blogChrono('test.sql')

fig=pylab.figure(figsize=(7,5))
dates,lengths,words,ltot,wtot=[],[],[],[0],[0]
for post in d.posts:
    dates.append(post["date"])
    lengths.append(len(post["text"]))
    ltot.append(ltot[-1]+lengths[-1])
    words.append(post["words"])
    wtot.append(wtot[-1]+words[-1])
ltot,wtot=ltot[1:],wtot[1:]

pylab.subplot(211)
#pylab.plot(dates,numpy.array(ltot)/(10.0**6),label="letters")
pylab.plot(dates,numpy.array(wtot)/(10.0**3),label="words")
pylab.ylabel("Thousand")
pylab.title("Total Blogged Words")
pylab.grid(alpha=.2)
#pylab.legend()
fig.autofmt_xdate()
pylab.subplot(212,sharex=pylab.subplot(211))
pylab.bar(dates,numpy.array(words)/(10.0**3))
pylab.title("Words Per Entry")
pylab.ylabel("Thousand")
pylab.xlabel("Date")
pylab.grid(alpha=.2)
#pylab.axis([min(d.dates),max(d.dates),None,20])
fig.autofmt_xdate()
pylab.subplots_adjust(left=.1,bottom=.13,right=.98,top=.92,hspace=.25)
width=675
pylab.savefig('out.png',dpi=675/7)
pylab.show()

print "DONE"

I wrote a Python script to analyze the word frequency of the blogs in my website (extracted from an SQL query WordPress backup file) for frequency, then I took the list to Wordie and created a word jumble. Neat, huh?

import datetime, pylab, numpy
f=open('dump.txt')
body=f.read()
f.close()
body=body.lower()
body=body.split(" ")
tot=float(len(body))
words={}
for word in body:
    for i in word:
        if 65&lt; =ord(i)&lt;=90 or 97&lt;=ord(i)&lt;=122: pass
        else: word=None
    if word:
        if not word in words:words[word]=0
        words[word]=words[word]+1
data=[]
for word in words: data.append([words[word],word])
data.sort()
data.reverse()
out= "&lt;b&gt;Out of %d words...n"%tot
xs=[]
for i in range(1000):
    d=data[i]
    out += '&lt;b&gt;"%s"&lt;/b&gt; ranks #%d used &lt;b&gt;%d&lt;/b&gt; times (%.05f%%)
n'%
                (d[1],i+1,d[0],d[0]/tot)
f=open("dump.html",'w')
f.write(out)
f.close()
print "DONE"&lt;/b&gt;

Markdown source code last modified on January 18th, 2021
---
title: Analyzing my Writings with Python
date: 2009-01-29 18:46:22
tags: python, old
---

# Analyzing my Writings with Python

__I had some free time in lab today__ (between steps while of an immunohistochemistry experiment) so I decided to further investigate the field of bioinformatics. I was curious if it may be worth seeking a PhD in bioinformatics if I don't get into dental school. UCF offers a PhD in bioinformatics but it's a new and small department (I think there are only 4 faculty). A degree in bioinformatics combines molecular biology (DNA, proteins, etc), computer science (programming), and statistics.

__I came across a paper today__: [Structural Alignment of Pseudoknotted RNA](http://cseweb.ucsd.edu/users/shzhang/app/RECOMB2005_pseudoknot.pdf) which is a good example of the practice of bioinformatics. Think about what goes on in a cell... the sequence of a gene (a short region of DNA) is copied (letter-by-letter) onto an RNA molecule. The RNA molecule is later read by an enzyme (called a ribosome) and converted into a protein based on its sequence. (This process is the central dogma of molecular biology) Traditionally, it was believed that RNA molecules' only function was to copy gene sequences from DNA to ribosomes, but recently (the last several years) it was discovered that some small RNA molecules are never read and turned into proteins, but rather serve their own unique functions! For example, some RNA molecules (siRNAs) can actually turn genes on and off, and have been associated with cancer development and other immune diseases. Given the human genome (the ~3 billion letter long sequence all of our DNA), how can we determine what regions form these functional RNA molecules which don't get converted into proteins? The paper I mentioned earlier addresses this. An algorithm was developed and used to test regions of DNA and predict its probability of forming small RNA molecules. Spikes in this trace (figure 7 of the paper) represent areas of the DNA which are likely to form these RNA molecules. (Is this useful? What if you were to compare these results between normal person and someone with cancer?)

__After reading the article__ I considered how similar my current programming projects are with this one (e.g., my recent DIY ECG projects). The paper shows a trace of score (likelihood that a region of DNA forms an RNA molecule) where peaks represent likely locations of RNA formation. Just generate the trace, determine the positions of the peaks, and you're golden. How similar is this to the work I've been doing with my homemade ECG machine, where I perform signal analysis to eliminate electrical noise and then analyze the resulting trace to isolate and identify peaks corresponding to heartbeats?

__I got the itch to write my own string-analysis program.__ It reads the content of my website (exported from a SQL query), splits it up by date, and analyzes it. Ultimately I want to track the usage of certain words, but for now I wrote a script which plots the number of words I wrote.

<div class="text-center">

[![](blog_words_thumb.jpg)](blog_words.png)

</div>

__Pretty cool huh? __Check out all those spikes between 2004 and 2005! Not only are they numerous (meaning many posts), but they're also high (meaning many words per post). As you can see by the top trace, the most significant contribution to my site occurred during this time. So, let's zoom in on it.

<div class="text-center">

[![](blog_words_zoom_thumb.jpg)](blog_words_zoom.png)

</div>

__Here is the code__ I used to produce this image.

```python
"""Convert SQL backups of my WordPress blog into charts"""

import datetime, pylab, numpy

class blogChrono():
    baseUrl="http://www.SWHarden.com/blog"
    posts=[]
    dates=[]
    def __init__(self,fname):
        self.fname=fname
        self.load()
    def load(self):
        print "loading [%s]..."%self.fname,
        f=open(self.fname)
        raw=f.readlines()
        f.close()
        for line in raw:
            if "INSERT INTO" in line
            and';' in line[-2:-1]
            and " 'post'," in line[-20:-1]:
                post={}
                line=line.split("VALUES(",1)[1][:-3]
                line=line.replace(', NULL',', None')
                line=line.replace(", '',",", None,")
                line=line.replace("''","")
                c= line.split(',',4)[4][::-1]
                c= c.split(" ,",21)
                text=c[-1]
                text=text[::-1]
                text=text[2:-1]
                text=text.replace('"""','###')
                line=line.replace(text,'blogtext')
                line=line.replace(', ,',', None,')
                line=eval("["+line+"]")
                if len(line[4])&gt;len('blogtext'):
                    x=str(line[4].split(', '))[2:-2]
                    raw=str(line)
                    raw=raw.replace(line[4],x)
                    line=eval(raw)
                post["id"]=int(line[0])
                post["date"]=datetime.datetime.strptime(line[2],
                                                        "%Y-%m-%d %H:%M:%S")
                post["text"]=eval('"""'+text+' """')
                post["title"]=line[5]
                post["url"]=line[21]
                post["comm"]=int(line[25])
                post["words"]=post["text"].count(" ")
                self.dates.append(post["date"])
                self.posts.append(post)
        self.dates.sort()
        d=self.dates[:]
        i,newposts=0,[]
        while len(self.posts)&gt;0:
            die=min(self.dates)
            for post in self.posts:
                if post["date"]==die:
                    self.dates.remove(die)
                    newposts.append(post)
                    self.posts.remove(post)
        self.posts,self.dates=newposts,d
        print "read %d posts!n"%len(self.posts)

#d=blogChrono('sml.sql')
d=blogChrono('test.sql')

fig=pylab.figure(figsize=(7,5))
dates,lengths,words,ltot,wtot=[],[],[],[0],[0]
for post in d.posts:
    dates.append(post["date"])
    lengths.append(len(post["text"]))
    ltot.append(ltot[-1]+lengths[-1])
    words.append(post["words"])
    wtot.append(wtot[-1]+words[-1])
ltot,wtot=ltot[1:],wtot[1:]

pylab.subplot(211)
#pylab.plot(dates,numpy.array(ltot)/(10.0**6),label="letters")
pylab.plot(dates,numpy.array(wtot)/(10.0**3),label="words")
pylab.ylabel("Thousand")
pylab.title("Total Blogged Words")
pylab.grid(alpha=.2)
#pylab.legend()
fig.autofmt_xdate()
pylab.subplot(212,sharex=pylab.subplot(211))
pylab.bar(dates,numpy.array(words)/(10.0**3))
pylab.title("Words Per Entry")
pylab.ylabel("Thousand")
pylab.xlabel("Date")
pylab.grid(alpha=.2)
#pylab.axis([min(d.dates),max(d.dates),None,20])
fig.autofmt_xdate()
pylab.subplots_adjust(left=.1,bottom=.13,right=.98,top=.92,hspace=.25)
width=675
pylab.savefig('out.png',dpi=675/7)
pylab.show()

print "DONE"
```

__I wrote a Python script to analyze the word frequency __of the blogs in my website (extracted from an SQL query WordPress backup file) for frequency, then I took the list to [Wordie](http://www.wordle.net/) and created a word jumble. Neat, huh?

```python
import datetime, pylab, numpy
f=open('dump.txt')
body=f.read()
f.close()
body=body.lower()
body=body.split(" ")
tot=float(len(body))
words={}
for word in body:
    for i in word:
        if 65&lt; =ord(i)&lt;=90 or 97&lt;=ord(i)&lt;=122: pass
        else: word=None
    if word:
        if not word in words:words[word]=0
        words[word]=words[word]+1
data=[]
for word in words: data.append([words[word],word])
data.sort()
data.reverse()
out= "&lt;b&gt;Out of %d words...n"%tot
xs=[]
for i in range(1000):
    d=data[i]
    out += '&lt;b&gt;"%s"&lt;/b&gt; ranks #%d used &lt;b&gt;%d&lt;/b&gt; times (%.05f%%)
n'%
                (d[1],i+1,d[0],d[0]/tot)
f=open("dump.html",'w')
f.write(out)
f.close()
print "DONE"&lt;/b&gt;
```

<div class="text-center">

[![](wordie2_thumb.jpg)](wordie2.png)

</div>

January 21st, 2009

Signal Filtering with Python

⚠️ SEE UPDATED POST: Signal Filtering in Python

I've been spending a lot of time creating a DIY ECGs which produce fairly noisy signals. I have researched the ways to clean-up these signals, and the results are very useful! I document some of these findings here.

This example shows how I take a noisy recording and turn it into a smooth trace. This is achieved by eliminating excess high-frequency components which are in the original recording due to electromagnetic noise. A major source of noise can be from the AC passing through wires traveling through the walls of my apartment. My original ECG circuit was highly susceptible to this kind of interference, but my improved ECG circuit eliminates much of this noise. However, noise is still in the trace and it needs to be removed.

One method of reducing noise uses the FFT (Fast Fourier Transformation) and its inverse (iFFT) algorithm. Let's say you have a trace with repeating sine-wave noise. The output of the FFT is the breakdown of the signal by frequency. Check out this FFT trace of a noisy signal from a few posts ago. High peaks represent frequencies which are common. See the enormous peak around 60 Hz? That's noise from AC power lines. Other peaks (shown in colored bands) are other electromagnetic noise sources, such as wireless networks, TVs, telephones, and maybe my computer. The heart produces changes in electricity that are very slow (a heartbeat is about 1 Hz), so if we can eliminate higher-frequency sine waves we can get a pretty clear trace. This is called a band-stop filter (we block-out certain bands of frequencies). A band-pass filter is the opposite, where we only allow frequencies which are below (low-pass) or above (high-pass) a given frequency. By eliminating each of the peaks in the colored regions (setting each value to 0), then performing an inverse fast Fourier transformation (going backwards from frequency back to time), the result is the signal trace (seen as light gray on the bottom graph) with those high-frequency sine waves removed! (the gray trace on the bottom graph). A little touch-up smoothing makes a great trace (black trace on the bottom graph).

Here's some Python code you may find useful. The image below is the output of the Python code at the bottom of this entry. This python file requires that test.wav (an actual ECG recording of my heartbeat) exist in the same folder.

  • (A) The original signal we want to isolate. (IE: our actual heart signal)

  • (B) Some electrical noise. (3 sine waves of different amplitudes and periods)

  • (C) Electrical noise (what happens when you add those 3 sine waves together)

  • (D) Static (random noise generated by a random number generator)

  • (E) Signal (A) plus static (D)

  • (F) Signal (A) plus static (D) plus electrical noise (C)

  • (G) Total FFT trace of (F). Note the low frequency peak due to the signal and electrical noise (near 0) and the high frequency peak due to static (near 10,000)

  • (H) This is a zoomed-in region of (F) showing 4 peaks (one for the original signal and 3 for high frequency noise). By blocking-out (set it to 0) everything above 10Hz (red), we isolate the peak we want (signal). This is a low-pass filter.

  • (I) Performing an inverse FFT (iFFT) on the low-pass iFFT, we get a nice trace which is our original signal!

  • (J) Comparison of our iFFT with our original signal shows that the amplitude is kinda messed up. If we normalize each of these (set minimum to 0, maximum to 1) they line up. Awesome!

  • (K) How close were we? Graphing the difference of iFFT and the original signal shows that usually we're not far off. The ends are a problem though, but if our data analysis trims off these ends then our center looks great.

  • Note: these ends can be fixed by applying a windowing function to the original data. The FFT works best if the input data starts and ends at zero.

import numpy, scipy, pylab, random

# This script demonstrates how to use band-pass (low-pass)
# filtering to eliminate electrical noise and static
# from signal data!

##################
### PROCESSING ###
##################

xs=numpy.arange(1,100,.01) #generate Xs (0.00,0.01,0.02,0.03,...,100.0)
signal = sin1=numpy.sin(xs*.3) #(A)
sin1=numpy.sin(xs) # (B) sin1
sin2=numpy.sin(xs*2.33)*.333 # (B) sin2
sin3=numpy.sin(xs*2.77)*.777 # (B) sin3
noise=sin1+sin2+sin3 # (C)
static = (numpy.random.random_sample((len(xs)))-.5)*.2 # (D)
sigstat=static+signal # (E)
rawsignal=sigstat+noise # (F)
fft=scipy.fft(rawsignal) # (G) and (H)
bp=fft[:]
for i in range(len(bp)): # (H-red)
    if i&gt;=10:bp[i]=0
ibp=scipy.ifft(bp) # (I), (J), (K) and (L)

################
### GRAPHING ###
################

h,w=6,2
pylab.figure(figsize=(12,9))
pylab.subplots_adjust(hspace=.7)

pylab.subplot(h,w,1);pylab.title("(A) Original Signal")
pylab.plot(xs,signal)

pylab.subplot(h,w,3);pylab.title("(B) Electrical Noise Sources (3 Sine Waves)")
pylab.plot(xs,sin1,label="sin1")
pylab.plot(xs,sin2,label="sin2")
pylab.plot(xs,sin3,label="sin3")
pylab.legend()

pylab.subplot(h,w,5);pylab.title("(C) Electrical Noise (3 sine waves added together)")
pylab.plot(xs,noise)

pylab.subplot(h,w,7);pylab.title("(D) Static (random noise)")
pylab.plot(xs,static)
pylab.axis([None,None,-1,1])

pylab.subplot(h,w,9);pylab.title("(E) Signal + Static")
pylab.plot(xs,sigstat)

pylab.subplot(h,w,11);pylab.title("(F) Recording (Signal + Static + Electrical Noise)")
pylab.plot(xs,rawsignal)

pylab.subplot(h,w,2);pylab.title("(G) FFT of Recording")
fft=scipy.fft(rawsignal)
pylab.plot(abs(fft))
pylab.text(200,3000,"signals",verticalalignment='top')
pylab.text(9500,3000,"static",verticalalignment='top',
        horizontalalignment='right')

pylab.subplot(h,w,4);pylab.title("(H) Low-Pass FFT")
pylab.plot(abs(fft))
pylab.text(17,3000,"sin1",verticalalignment='top',horizontalalignment='left')
pylab.text(37,2000,"sin2",verticalalignment='top',horizontalalignment='center')
pylab.text(45,3000,"sin3",verticalalignment='top',horizontalalignment='left')
pylab.text(6,3000,"signal",verticalalignment='top',horizontalalignment='left')
pylab.axvspan(10,10000,fc='r',alpha='.5')
pylab.axis([0,60,None,None])

pylab.subplot(h,w,6);pylab.title("(I) Inverse FFT")
pylab.plot(ibp)

pylab.subplot(h,w,8);pylab.title("(J) Signal vs. iFFT")
pylab.plot(signal,'k',label="signal",alpha=.5)
pylab.plot(ibp,'b',label="ifft",alpha=.5)

pylab.subplot(h,w,10);pylab.title("(K) Normalized Signal vs. iFFT")
pylab.plot(signal/max(signal),'k',label="signal",alpha=.5)
pylab.plot(ibp/max(ibp),'b',label="ifft",alpha=.5)

pylab.subplot(h,w,12);pylab.title("(L) Difference / Error")
pylab.plot(signal/max(signal)-ibp/max(ibp),'k')

pylab.savefig("SIG.png",dpi=200)
pylab.show()
Markdown source code last modified on January 18th, 2021
---
title: Signal Filtering with Python
date: 2009-01-21 16:25:04
tags: diyECG, python, old
---

# Signal Filtering with Python

> **⚠️ SEE UPDATED POST:** [**Signal Filtering in Python**](https://swharden.com/blog/2020-09-23-signal-filtering-in-python/)

I've been spending a lot of time creating a [DIY ECGs](http://www.swharden.com/blog/category/diy-ecg-home-made-electrocardiogram/) which produce fairly noisy signals. I have researched the ways to clean-up these signals, and the results are very useful! I document some of these findings here.

<div class="text-center">

[![](filtering_thumb.jpg)](filtering.png)

</div>

__This example shows how I take__ __a noisy recording and turn it into a smooth trace. __This is achieved by eliminating excess high-frequency components which are in the original recording due to electromagnetic noise. A major source of noise can be from the AC passing through wires traveling through the walls of my apartment. My [original ECG circuit](http://www.swharden.com/blog/images/opampecg.gif) was highly susceptible to this kind of interference, but my [improved ECG circuit](http://www.swharden.com/blog/images/bigsch.gif) eliminates much of this noise. However, noise is still in the trace and it needs to be removed.

__One method of reducing noise uses the FFT (Fast Fourier Transformation) and its inverse (iFFT) algorithm__. Let's say you have a trace with repeating sine-wave noise. The output of the FFT is the breakdown of the signal by frequency. Check out [this FFT trace of a noisy signal](http://www.swharden.com/blog/images/diy_ecg4.png) from a few posts ago. High peaks represent frequencies which are common. See the enormous peak around 60 Hz? That's noise from AC power lines. Other peaks (shown in colored bands) are other electromagnetic noise sources, such as wireless networks, TVs, telephones, and maybe my computer. The heart produces changes in electricity that are very slow (a heartbeat is about 1 Hz), so if we can eliminate higher-frequency sine waves we can get a pretty clear trace. This is called a band-stop filter (we block-out certain bands of frequencies). A band-pass filter is the opposite, where we only allow frequencies which are below (low-pass) or above (high-pass) a given frequency. By eliminating each of the peaks in the colored regions (setting each value to 0), then performing an inverse fast Fourier transformation (going backwards from frequency back to time), the result is the signal trace (seen as light gray on the bottom graph) with those high-frequency sine waves removed! (the gray trace on the bottom graph). A little touch-up smoothing makes a great trace (black trace on the bottom graph).

__Here's some Python code you may find useful.__ The image below is the output of the Python code at the bottom of this entry. This python file requires that [test.wav](http://www.SWHarden.com/tmp/test.wav) (an actual ECG recording of my heartbeat) exist in the same folder.

<div class="text-center img-border">

[![](sig_thumb.jpg)](sig.png)

</div>

*   (A) The original signal we want to isolate. (IE: our actual heart signal)
*   (B) Some electrical noise. (3 sine waves of different amplitudes and periods)
*   (C) Electrical noise (what happens when you add those 3 sine waves together)
*   (D) Static (random noise generated by a random number generator)
*   (E) Signal (A) plus static (D)
*   (F) Signal (A) plus static (D) plus electrical noise (C)
*   (G) Total FFT trace of (F). Note the low frequency peak due to the signal and electrical noise (near 0) and the high frequency peak due to static (near 10,000)
*   (H) This is a zoomed-in region of (F) showing 4 peaks (one for the original signal and 3 for high frequency noise). By blocking-out (set it to 0) everything above 10Hz (red), we isolate the peak we want (signal). This is a low-pass filter.
*   (I) Performing an inverse FFT (iFFT) on the low-pass iFFT, we get a nice trace which is our original signal!
*   (J) Comparison of our iFFT with our original signal shows that the amplitude is kinda messed up. If we normalize each of these (set minimum to 0, maximum to 1) they line up. Awesome!
*   (K) How close were we? Graphing the difference of iFFT and the original signal shows that usually we're not far off. The ends are a problem though, but if our data analysis trims off these ends then our center looks great.

*   _Note: these ends can be fixed by applying a windowing function to the original data. The FFT works best if the input data starts and ends at zero._

```python
import numpy, scipy, pylab, random

# This script demonstrates how to use band-pass (low-pass)
# filtering to eliminate electrical noise and static
# from signal data!

##################
### PROCESSING ###
##################

xs=numpy.arange(1,100,.01) #generate Xs (0.00,0.01,0.02,0.03,...,100.0)
signal = sin1=numpy.sin(xs*.3) #(A)
sin1=numpy.sin(xs) # (B) sin1
sin2=numpy.sin(xs*2.33)*.333 # (B) sin2
sin3=numpy.sin(xs*2.77)*.777 # (B) sin3
noise=sin1+sin2+sin3 # (C)
static = (numpy.random.random_sample((len(xs)))-.5)*.2 # (D)
sigstat=static+signal # (E)
rawsignal=sigstat+noise # (F)
fft=scipy.fft(rawsignal) # (G) and (H)
bp=fft[:]
for i in range(len(bp)): # (H-red)
    if i&gt;=10:bp[i]=0
ibp=scipy.ifft(bp) # (I), (J), (K) and (L)

################
### GRAPHING ###
################

h,w=6,2
pylab.figure(figsize=(12,9))
pylab.subplots_adjust(hspace=.7)

pylab.subplot(h,w,1);pylab.title("(A) Original Signal")
pylab.plot(xs,signal)

pylab.subplot(h,w,3);pylab.title("(B) Electrical Noise Sources (3 Sine Waves)")
pylab.plot(xs,sin1,label="sin1")
pylab.plot(xs,sin2,label="sin2")
pylab.plot(xs,sin3,label="sin3")
pylab.legend()

pylab.subplot(h,w,5);pylab.title("(C) Electrical Noise (3 sine waves added together)")
pylab.plot(xs,noise)

pylab.subplot(h,w,7);pylab.title("(D) Static (random noise)")
pylab.plot(xs,static)
pylab.axis([None,None,-1,1])

pylab.subplot(h,w,9);pylab.title("(E) Signal + Static")
pylab.plot(xs,sigstat)

pylab.subplot(h,w,11);pylab.title("(F) Recording (Signal + Static + Electrical Noise)")
pylab.plot(xs,rawsignal)

pylab.subplot(h,w,2);pylab.title("(G) FFT of Recording")
fft=scipy.fft(rawsignal)
pylab.plot(abs(fft))
pylab.text(200,3000,"signals",verticalalignment='top')
pylab.text(9500,3000,"static",verticalalignment='top',
        horizontalalignment='right')

pylab.subplot(h,w,4);pylab.title("(H) Low-Pass FFT")
pylab.plot(abs(fft))
pylab.text(17,3000,"sin1",verticalalignment='top',horizontalalignment='left')
pylab.text(37,2000,"sin2",verticalalignment='top',horizontalalignment='center')
pylab.text(45,3000,"sin3",verticalalignment='top',horizontalalignment='left')
pylab.text(6,3000,"signal",verticalalignment='top',horizontalalignment='left')
pylab.axvspan(10,10000,fc='r',alpha='.5')
pylab.axis([0,60,None,None])

pylab.subplot(h,w,6);pylab.title("(I) Inverse FFT")
pylab.plot(ibp)

pylab.subplot(h,w,8);pylab.title("(J) Signal vs. iFFT")
pylab.plot(signal,'k',label="signal",alpha=.5)
pylab.plot(ibp,'b',label="ifft",alpha=.5)

pylab.subplot(h,w,10);pylab.title("(K) Normalized Signal vs. iFFT")
pylab.plot(signal/max(signal),'k',label="signal",alpha=.5)
pylab.plot(ibp/max(ibp),'b',label="ifft",alpha=.5)

pylab.subplot(h,w,12);pylab.title("(L) Difference / Error")
pylab.plot(signal/max(signal)-ibp/max(ibp),'k')

pylab.savefig("SIG.png",dpi=200)
pylab.show()
```

January 15th, 2009

DIY ECG Trial 1

⚠️ Check out my newer ECG designs:

I've succeeded in building my own electrocardiograph (ECG) to record the electrical activity of my own heart! Briefly, I built a micropotential amplifier using an op-amp and attached it to makeshift electrodes on my chest (pennies and shampoo), fed the amplified signal into my sound card, and recorded it as a WAV. The signal is very noisy though. I was able to do a great job at removing this noise using band/frequency filters in GoldWave (audio editing software designed to handle WAV files). I band-blocked 50-70 Hz (which removed the oscillations from the 60 Hz AC lines running around my apartment). I then wrote the Python code (at the bottom of this entry) to load this WAV file as a single list of numbers (voltage potentials). I performed a data condensation algorithm (converting 100 points of raw WAV data into a single, averaged point, lessening my processing load by 100x), followed by two consecutive moving window averages (20-point window, performed on the condensed data). The result was a voltage reading that had most of the noise removed and a beautiful ECG signal emerged! I also tossed in some code to determine the peak of the R wave, label it (yellow dot), and use the inverse R-R time distance to calculate heart rate.

This is my actual ECC signal as record by a circuit similar to the one in the previous entry, recorded through my sound card, and processed with the Python script below. You can start to see the Q, R, S, and T components. I can't wait to solder-up a prototype (it's currently breadboarded) and try to analyze hours of data rather than just a few seconds. I'll take pictures of this device soon.

And here's the code I used: note that it relies on the WAV file I recorded. This code has extra functions not required to produce the image above, but I left them in in case they may be useful.

import wave, struct, numpy, pylab, scipy

def readwave(wavfilename):
    """load raw data directly from a WAV file."""
    global rate
    w=wave.open(wavfilename,'rb')
    (nchannel, width, rate, length, comptype, compname) = w.getparams()
    print "[%s] %d HZ (%0.2fsec)" %(wavfilename, rate, length/float(rate))
    frames = w.readframes(length)
    return numpy.array(struct.unpack("%sh" %length*nchannel,frames))

def shrink(data,deg=100):
    """condense a linear data array by a multiple of [deg]."""
    global rate
    small=[]
    print "starting with", len(data)
    for i in range(len(data)/deg):
        small.append(numpy.average(data[i*deg:(i+1)*deg]))
    print "ending with", len(small)
    rate = rate/deg
    return small

def normalize(data):
    """make all data fit between -.5 and +.5"""
    data=data-numpy.average(data)
    big=float(max(data))
    sml=float(min(data))
    data=data/abs(big-sml)
    data=data+float(abs(min(data)))-.47
    return data

def smooth(data,deg=20):
    """moving window average (deg = window size)."""
    for i in range(len(data)-deg):
        if i==0: cur,smooth=sum(data[0:deg]),[]
        smooth.append(cur/deg)
        cur=cur-data[i]+data[i+deg]
    return smooth

def makeabs(data):
    """center linear data to its average value."""
    for i in range(len(data)): data[i]=abs(data[i])
    return data

def invert(data):
    """obviously."""
    for i in range(len(data)): data[i]=-data[i]
    return data

def loadwav(fname='./wavs/bandpassed.wav'):
    """a do-everything function to get usable, smoothed data from a WAV."""
    wav=readwave(fname)
    wav=shrink(wav)
    wav=invert(wav)
    wav=smooth(wav)
    wav=smooth(wav,10)
    wav=normalize(wav)
    Xs=getXs(wav)
    return Xs,wav

def getXs(datalen):
    """calculate time positions based on WAV frequency resolution."""
    Xs=[]
    for i in range(len(datalen)):
        Xs.append(i*(1/float(rate)))
    print len(datalen), len(Xs)
    return Xs

def integrate(data):
    """integrate the function with respect to its order."""
    inte=[]
    for i in range(len(data)-1):
        inte.append(abs(data[i]-data[i+1]))
    inte.append(inte[-1])
    return inte

def getPoints(Xs,data,res=10):
    """return X,Y coordinates of R peaks and calculate R-R based heartrate."""
    pXs,pYs,pHRs=[],[],[]
    for i in range(res,len(data)-res):
        if data[i]&gt;data[i-res]+.1 and data[i]&gt;data[i+res]+.1:
            if data[i]&gt;data[i-1] and data[i]&gt;data[i+1]:
                pXs.append(Xs[i])
                pYs.append(data[i])
                if len(pXs)&gt;1:
                    pHRs.append((1.0/(pXs[-1]-pXs[-2]))*60.0)
    pHRs.append(pHRs[-1])
    return pXs,pYs,pHRs

Xs,Ys=loadwav()
px,py,pHR=getPoints(Xs,Ys)

pylab.figure(figsize=(12,6))
pylab.subplot(2,1,1)
#pylab.axhline(color='.4',linestyle=':')
pylab.plot(Xs,Ys,'b-')
pylab.plot(px,py,'y.')
pylab.axis([None,None,-.6,.6])
pylab.title("DIY Electrocardiogram - Trial 1",fontsize=26)
pylab.ylabel("Normalized Potential",fontsize=16)
#pylab.xlabel("Time (sec)")
ax=pylab.axis()
pylab.subplot(2,1,2)
pylab.plot(px,pHR,'k:')
pylab.plot(px,pHR,'b.')
pylab.axis([ax[0],ax[1],None,None])
pylab.ylabel("Heart Rate (BPM)",fontsize=16)
pylab.xlabel("Time (seconds)",fontsize=16)
pylab.savefig("test.png",dpi=120)
pylab.show()
print "DONE"
Markdown source code last modified on January 18th, 2021
---
title: DIY ECG Trial 1
date: 2009-01-15 00:39:45
tags: diyECG, python, old
---

# DIY ECG Trial 1

> **⚠️ Check out my newer ECG designs:** 
* [**Sound Card ECG with AD8232**](https://swharden.com/blog/2019-03-15-sound-card-ecg-with-ad8232/)
* [**Single op-amp ECG**](https://swharden.com/blog/2016-08-08-diy-ecg-with-1-op-amp/)

__I've succeeded in building my own electrocardiograph (ECG) to record the electrical activity of my own heart!__ Briefly, I built a micropotential amplifier using an op-amp and attached it to makeshift electrodes on my chest (pennies and shampoo), fed the amplified signal into my sound card, and recorded it as a WAV. The signal is very noisy though. I was able to do a great job at removing this noise using band/frequency filters in GoldWave (audio editing software designed to handle WAV files). I band-blocked 50-70 Hz (which removed the oscillations from the 60 Hz AC lines running around my apartment). I then wrote the Python code (at the bottom of this entry) to load this WAV file as a single list of numbers (voltage potentials). I performed a data condensation algorithm (converting 100 points of raw WAV data into a single, averaged point, lessening my processing load by 100x), followed by two consecutive moving window averages (20-point window, performed on the condensed data). The result was a voltage reading that had most of the noise removed and a beautiful ECG signal emerged! I also tossed in some code to determine the peak of the R wave, label it (yellow dot), and use the inverse R-R time distance to calculate heart rate.

<div class="text-center">

[![](diy_ecg2_thumb.jpg)](diy_ecg2.png)

</div>

__This is my actual ECC signal__ as record by a circuit similar to the one in the previous entry, recorded through my sound card, and processed with the Python script below. You can start to see the Q, R, S, and T components. I can't wait to solder-up a prototype (it's currently breadboarded) and try to analyze hours of data rather than just a few seconds. I'll take pictures of this device soon.

<div class="text-center">

[![](diy_ecg1_thumb.jpg)](diy_ecg1.png)

</div>

__And here's the code I used:__ note that it relies on the WAV file I recorded. This code has extra functions not required to produce the image above, but I left them in in case they may be useful.

```python
import wave, struct, numpy, pylab, scipy

def readwave(wavfilename):
    """load raw data directly from a WAV file."""
    global rate
    w=wave.open(wavfilename,'rb')
    (nchannel, width, rate, length, comptype, compname) = w.getparams()
    print "[%s] %d HZ (%0.2fsec)" %(wavfilename, rate, length/float(rate))
    frames = w.readframes(length)
    return numpy.array(struct.unpack("%sh" %length*nchannel,frames))

def shrink(data,deg=100):
    """condense a linear data array by a multiple of [deg]."""
    global rate
    small=[]
    print "starting with", len(data)
    for i in range(len(data)/deg):
        small.append(numpy.average(data[i*deg:(i+1)*deg]))
    print "ending with", len(small)
    rate = rate/deg
    return small

def normalize(data):
    """make all data fit between -.5 and +.5"""
    data=data-numpy.average(data)
    big=float(max(data))
    sml=float(min(data))
    data=data/abs(big-sml)
    data=data+float(abs(min(data)))-.47
    return data

def smooth(data,deg=20):
    """moving window average (deg = window size)."""
    for i in range(len(data)-deg):
        if i==0: cur,smooth=sum(data[0:deg]),[]
        smooth.append(cur/deg)
        cur=cur-data[i]+data[i+deg]
    return smooth

def makeabs(data):
    """center linear data to its average value."""
    for i in range(len(data)): data[i]=abs(data[i])
    return data

def invert(data):
    """obviously."""
    for i in range(len(data)): data[i]=-data[i]
    return data

def loadwav(fname='./wavs/bandpassed.wav'):
    """a do-everything function to get usable, smoothed data from a WAV."""
    wav=readwave(fname)
    wav=shrink(wav)
    wav=invert(wav)
    wav=smooth(wav)
    wav=smooth(wav,10)
    wav=normalize(wav)
    Xs=getXs(wav)
    return Xs,wav

def getXs(datalen):
    """calculate time positions based on WAV frequency resolution."""
    Xs=[]
    for i in range(len(datalen)):
        Xs.append(i*(1/float(rate)))
    print len(datalen), len(Xs)
    return Xs

def integrate(data):
    """integrate the function with respect to its order."""
    inte=[]
    for i in range(len(data)-1):
        inte.append(abs(data[i]-data[i+1]))
    inte.append(inte[-1])
    return inte

def getPoints(Xs,data,res=10):
    """return X,Y coordinates of R peaks and calculate R-R based heartrate."""
    pXs,pYs,pHRs=[],[],[]
    for i in range(res,len(data)-res):
        if data[i]&gt;data[i-res]+.1 and data[i]&gt;data[i+res]+.1:
            if data[i]&gt;data[i-1] and data[i]&gt;data[i+1]:
                pXs.append(Xs[i])
                pYs.append(data[i])
                if len(pXs)&gt;1:
                    pHRs.append((1.0/(pXs[-1]-pXs[-2]))*60.0)
    pHRs.append(pHRs[-1])
    return pXs,pYs,pHRs

Xs,Ys=loadwav()
px,py,pHR=getPoints(Xs,Ys)

pylab.figure(figsize=(12,6))
pylab.subplot(2,1,1)
#pylab.axhline(color='.4',linestyle=':')
pylab.plot(Xs,Ys,'b-')
pylab.plot(px,py,'y.')
pylab.axis([None,None,-.6,.6])
pylab.title("DIY Electrocardiogram - Trial 1",fontsize=26)
pylab.ylabel("Normalized Potential",fontsize=16)
#pylab.xlabel("Time (sec)")
ax=pylab.axis()
pylab.subplot(2,1,2)
pylab.plot(px,pHR,'k:')
pylab.plot(px,pHR,'b.')
pylab.axis([ax[0],ax[1],None,None])
pylab.ylabel("Heart Rate (BPM)",fontsize=16)
pylab.xlabel("Time (seconds)",fontsize=16)
pylab.savefig("test.png",dpi=120)
pylab.show()
print "DONE"
```

Pages