The personal website of Scott W Harden
March 5th, 2010

Animated Realtime Spectrograph with Scrolling Waterfall Display in Python

My project is coming along nicely. This isn't an incredibly robust spectrograph program, but it sure gets the job done quickly and easily. The code below will produce a real time scrolling spectrograph entirely with Python! It polls the microphone (or default recording device), should work on any OS, and can be adjusted for vertical resolution / FFT frequency discretion resolution. It has some simple functions for filtering (check out the de-trend filter!) and might serve as a good start to a spectrograph / frequency analysis project. It took my a long time to reach this point! I've worked with Python before, and dabbled with the Python Imaging Library (PIL), but this is my first experience with real time linear data analysis and high-demand multi-threading. I hope it helps you. Below are screenshots of the program (two running at the same time) listening to the same radio signals (mostly Morse code) with standard output and with the "de-trending filter" activated.

import pyaudio
import scipy
import struct
import scipy.fftpack

from Tkinter import *
import threading
import time
import datetime
import wckgraph
import math

import Image
import ImageTk
from PIL import ImageOps
from PIL import ImageChops
import time
import random
import threading
import scipy

# ADJUST RESOLUTION OF VERTICAL FFT
bufferSize = 2**11
# bufferSize=2**8

# ADJUSTS AVERAGING SPEED NOT VERTICAL RESOLUTION
# REDUCE HERE IF YOUR PC CANT KEEP UP
sampleRate = 24000
# sampleRate=64000

p = pyaudio.PyAudio()
chunks = []
ffts = []


def stream():
    global chunks, inStream, bufferSize
    while True:
        chunks.append(inStream.read(bufferSize))


def record():
    global w, inStream, p, bufferSize
    inStream = p.open(format=pyaudio.paInt16, channels=1,
                      rate=sampleRate, input=True, frames_per_buffer=bufferSize)
    threading.Thread(target=stream).start()
    # stream()


def downSample(fftx, ffty, degree=10):
    x, y = [], []
    for i in range(len(ffty)/degree-1):
        x.append(fftx[i*degree+degree/2])
        y.append(sum(ffty[i*degree:(i+1)*degree])/degree)
    return [x, y]


def smoothWindow(fftx, ffty, degree=10):
    lx, ly = fftx[degree:-degree], []
    for i in range(degree, len(ffty)-degree):
        ly.append(sum(ffty[i-degree:i+degree]))
    return [lx, ly]


def smoothMemory(ffty, degree=3):
    global ffts
    ffts = ffts+[ffty]
    if len(ffts) < =degree:
        # ly.append(fft[i]-(ffty[i-degree]+ffty[i+degree])/2) return [lx,ly] def graph(): global chunks, bufferSize, fftx,ffty, w if len(chunks)>0:
        return ffty ffts = ffts[1:] return scipy.average(scipy.array(ffts), 0) def detrend(fftx, ffty, degree=10): lx, ly = fftx[degree:-degree], [] for i in range(degree, len(ffty)-degree): ly.append((ffty[i]-sum(ffty[i-degree:i+degree])/(degree*2)) * 2+128)
        data = chunks.pop(0)
        data = scipy.array(struct.unpack("%dB" % (bufferSize*2), data))
        ffty = scipy.fftpack.fft(data)
        fftx = scipy.fftpack.rfftfreq(bufferSize*2, 1.0/sampleRate)
        fftx = fftx[0:len(fftx)/4]
        ffty = abs(ffty[0:len(ffty)/2])/1000
        ffty1 = ffty[:len(ffty)/2]
        ffty2 = ffty[len(ffty)/2::]+2
        ffty2 = ffty2[::-1]
        ffty = ffty1+ffty2
        ffty = (scipy.log(ffty)-1)*120
        fftx, ffty = downSample(fftx, ffty, 2)
        updatePic(fftx, ffty)
        reloadPic()

    if len(chunks) > 20:
        print "falling behind...", len(chunks)


def go(x=None):
    global w, fftx, ffty
    print "STARTING!"
    threading.Thread(target=record).start()
    while True:
        # record()
        graph()


def updatePic(datax, data):
    global im, iwidth, iheight
    strip = Image.new("L", (1, iheight))
    if len(data) > iheight:
        data = data[:iheight-1]
    # print "MAX FREQ:",datax[-1]
    strip.putdata(data)
    # print "%03d, %03d" % (max(data[-100:]), min(data[-100:]))
    im.paste(strip, (iwidth-1, 0))
    im = im.offset(-1, 0)
    root.update()


def reloadPic():
    global im, lab
    lab.image = ImageTk.PhotoImage(im)
    lab.config(image=lab.image)


root = Tk()
im = Image.open('./ramp.tif')
im = im.convert("L")
iwidth, iheight = im.size
im = im.crop((0, 0, 500, 480))
# im=Image.new("L",(100,1024))
iwidth, iheight = im.size
root.geometry('%dx%d' % (iwidth, iheight))
lab = Label(root)
lab.place(x=0, y=0, width=iwidth, height=iheight)
go()

UPDATE: I'm not going to post the code for this yet (it's very messy) but I got this thing to display a spectrograph on a canvas. What's the advantage of that? Huge, massive spectrographs (thousands of pixels in all directions) can now be browsed in real time using scrollbars, and when you scroll it doesn't stop recording, and you don't lose any data! Super cool.

Markdown source code last modified on January 18th, 2021
---
title: Animated Realtime Spectrograph with Scrolling Waterfall Display in Python
date: 2010-03-05 22:51:21
tags: python, old
---

# Animated Realtime Spectrograph with Scrolling Waterfall Display in Python

__My project is coming along nicely.__ This isn't an incredibly robust spectrograph program, but it sure gets the job done quickly and easily. The code below will produce a real time scrolling spectrograph entirely with Python! It polls the microphone (or default recording device), should work on any OS, and can be adjusted for vertical resolution / FFT frequency discretion resolution. It has some simple functions for filtering (check out the de-trend filter!) and might serve as a good start to a spectrograph / frequency analysis project. It took my a long time to reach this point! I've worked with Python before, and dabbled with the Python Imaging Library (PIL), but this is my first experience with real time linear data analysis and high-demand multi-threading. I hope it helps you. Below are screenshots of the program (two running at the same time) listening to the same radio signals (mostly Morse code) with standard output and with the "de-trending filter" activated.

<div class="text-center img-border">

[![](spectrogram-scrollbars_thumb.jpg)](spectrogram-scrollbars.png)

</div>

<div class="text-center img-border img-small">

[![](nofilter_thumb.jpg)](nofilter.png)
[![](filter_thumb.jpg)](filter.png)

</div>

```python
import pyaudio
import scipy
import struct
import scipy.fftpack

from Tkinter import *
import threading
import time
import datetime
import wckgraph
import math

import Image
import ImageTk
from PIL import ImageOps
from PIL import ImageChops
import time
import random
import threading
import scipy

# ADJUST RESOLUTION OF VERTICAL FFT
bufferSize = 2**11
# bufferSize=2**8

# ADJUSTS AVERAGING SPEED NOT VERTICAL RESOLUTION
# REDUCE HERE IF YOUR PC CANT KEEP UP
sampleRate = 24000
# sampleRate=64000

p = pyaudio.PyAudio()
chunks = []
ffts = []


def stream():
    global chunks, inStream, bufferSize
    while True:
        chunks.append(inStream.read(bufferSize))


def record():
    global w, inStream, p, bufferSize
    inStream = p.open(format=pyaudio.paInt16, channels=1,
                      rate=sampleRate, input=True, frames_per_buffer=bufferSize)
    threading.Thread(target=stream).start()
    # stream()


def downSample(fftx, ffty, degree=10):
    x, y = [], []
    for i in range(len(ffty)/degree-1):
        x.append(fftx[i*degree+degree/2])
        y.append(sum(ffty[i*degree:(i+1)*degree])/degree)
    return [x, y]


def smoothWindow(fftx, ffty, degree=10):
    lx, ly = fftx[degree:-degree], []
    for i in range(degree, len(ffty)-degree):
        ly.append(sum(ffty[i-degree:i+degree]))
    return [lx, ly]


def smoothMemory(ffty, degree=3):
    global ffts
    ffts = ffts+[ffty]
    if len(ffts) < =degree:
        # ly.append(fft[i]-(ffty[i-degree]+ffty[i+degree])/2) return [lx,ly] def graph(): global chunks, bufferSize, fftx,ffty, w if len(chunks)>0:
        return ffty ffts = ffts[1:] return scipy.average(scipy.array(ffts), 0) def detrend(fftx, ffty, degree=10): lx, ly = fftx[degree:-degree], [] for i in range(degree, len(ffty)-degree): ly.append((ffty[i]-sum(ffty[i-degree:i+degree])/(degree*2)) * 2+128)
        data = chunks.pop(0)
        data = scipy.array(struct.unpack("%dB" % (bufferSize*2), data))
        ffty = scipy.fftpack.fft(data)
        fftx = scipy.fftpack.rfftfreq(bufferSize*2, 1.0/sampleRate)
        fftx = fftx[0:len(fftx)/4]
        ffty = abs(ffty[0:len(ffty)/2])/1000
        ffty1 = ffty[:len(ffty)/2]
        ffty2 = ffty[len(ffty)/2::]+2
        ffty2 = ffty2[::-1]
        ffty = ffty1+ffty2
        ffty = (scipy.log(ffty)-1)*120
        fftx, ffty = downSample(fftx, ffty, 2)
        updatePic(fftx, ffty)
        reloadPic()

    if len(chunks) > 20:
        print "falling behind...", len(chunks)


def go(x=None):
    global w, fftx, ffty
    print "STARTING!"
    threading.Thread(target=record).start()
    while True:
        # record()
        graph()


def updatePic(datax, data):
    global im, iwidth, iheight
    strip = Image.new("L", (1, iheight))
    if len(data) > iheight:
        data = data[:iheight-1]
    # print "MAX FREQ:",datax[-1]
    strip.putdata(data)
    # print "%03d, %03d" % (max(data[-100:]), min(data[-100:]))
    im.paste(strip, (iwidth-1, 0))
    im = im.offset(-1, 0)
    root.update()


def reloadPic():
    global im, lab
    lab.image = ImageTk.PhotoImage(im)
    lab.config(image=lab.image)


root = Tk()
im = Image.open('./ramp.tif')
im = im.convert("L")
iwidth, iheight = im.size
im = im.crop((0, 0, 500, 480))
# im=Image.new("L",(100,1024))
iwidth, iheight = im.size
root.geometry('%dx%d' % (iwidth, iheight))
lab = Label(root)
lab.place(x=0, y=0, width=iwidth, height=iheight)
go()
```

__UPDATE: I'm not going to post the code for this yet__ (it's very messy) but I got this thing to display a spectrograph on a canvas. What's the advantage of that? Huge, massive spectrographs (thousands of pixels in all directions) can now be browsed in real time using scrollbars, and when you scroll it doesn't stop recording, and you don't lose any data! Super cool.
March 3rd, 2010

Display large Images with Scrollbars with Python, Tk, and PIL

I wrote a program to display extremely large images in Python using TK. It's interesting how simple this program is, yet frustrating how long it took me to figure out.

This little Python program will load an image (pretty much any format) using the Python Imaging Library (PIL, which must be installed) and allows you to see it on a scrollable canvas (in two directions) with Tkinter and ImageTk. The above screenshot is of the program viewing the image below:

What is that image? I won't get ahead of myself, but it's about 5kHz of audio from 10.140mHz which includes a popular QRSS calling frequency. The image displays an hour of data. My ultimate goal is to have it scroll in the TK window, with slide-adjustable brightness/contrast/etc.

from Tkinter import *
import Image, ImageTk

class ScrolledCanvas(Frame):
     def __init__(self, parent=None):
          Frame.__init__(self, parent)
          self.master.title("Spectrogram Viewer")
          self.pack(expand=YES, fill=BOTH)
          canv = Canvas(self, relief=SUNKEN)
          canv.config(width=400, height=200)
          canv.config(highlightthickness=0)

          sbarV = Scrollbar(self, orient=VERTICAL)
          sbarH = Scrollbar(self, orient=HORIZONTAL)

          sbarV.config(command=canv.yview)
          sbarH.config(command=canv.xview)

          canv.config(yscrollcommand=sbarV.set)
          canv.config(xscrollcommand=sbarH.set)

          sbarV.pack(side=RIGHT, fill=Y)
          sbarH.pack(side=BOTTOM, fill=X)

          canv.pack(side=LEFT, expand=YES, fill=BOTH)
          self.im=Image.open("./1hr_original.jpg")
          width,height=self.im.size
          canv.config(scrollregion=(0,0,width,height))
          self.im2=ImageTk.PhotoImage(self.im)
          self.imgtag=canv.create_image(0,0,anchor="nw",image=self.im2)

ScrolledCanvas().mainloop()
Markdown source code last modified on January 18th, 2021
---
title: Display large Images with Scrollbars with Python, Tk, and PIL
date: 2010-03-03 19:56:55
tags: python, old
---

# Display large Images with Scrollbars with Python, Tk, and PIL

__I wrote a program to display extremely large images in Python using TK. __It's interesting how simple this program is, yet frustrating how long it took me to figure out.

<div class="text-center img-border">

[![](specview_thumb.jpg)](specview.png)

</div>

__This little Python program__ will load an image (pretty much any format) using the Python Imaging Library (PIL, which must be installed) and allows you to see it on a scrollable canvas (in two directions) with Tkinter and ImageTk. The above screenshot is of the program viewing the image below:

<div class="text-center img-border large">

[![](1hr_original_thumb.jpg)](1hr_original.jpg)

</div>

__What is that image?__ I won't get ahead of myself, but it's about 5kHz of audio from 10.140mHz which includes a popular QRSS calling frequency. The image displays an hour of data. My ultimate goal is to have it scroll in the TK window, with slide-adjustable brightness/contrast/etc.

```python
from Tkinter import *
import Image, ImageTk

class ScrolledCanvas(Frame):
     def __init__(self, parent=None):
          Frame.__init__(self, parent)
          self.master.title("Spectrogram Viewer")
          self.pack(expand=YES, fill=BOTH)
          canv = Canvas(self, relief=SUNKEN)
          canv.config(width=400, height=200)
          canv.config(highlightthickness=0)

          sbarV = Scrollbar(self, orient=VERTICAL)
          sbarH = Scrollbar(self, orient=HORIZONTAL)

          sbarV.config(command=canv.yview)
          sbarH.config(command=canv.xview)

          canv.config(yscrollcommand=sbarV.set)
          canv.config(xscrollcommand=sbarH.set)

          sbarV.pack(side=RIGHT, fill=Y)
          sbarH.pack(side=BOTTOM, fill=X)

          canv.pack(side=LEFT, expand=YES, fill=BOTH)
          self.im=Image.open("./1hr_original.jpg")
          width,height=self.im.size
          canv.config(scrollregion=(0,0,width,height))
          self.im2=ImageTk.PhotoImage(self.im)
          self.imgtag=canv.create_image(0,0,anchor="nw",image=self.im2)

ScrolledCanvas().mainloop()
```
February 2nd, 2010

Convert Text to CW Morse Code with Linux

I wanted a way to have a bunch of Morse code mp3s on my mp3 player (with a WPM/speed that I decide and I found an easy way to do it with Linux. Rather than downloading existing mp3s of boring text, I wanted to be able to turn ANY text into Morse code, so I could copy something interesting (perhaps the news? hackaday? bash.org?). It's a little devious, but my plan is to practice copying Morse code during class when lectures become monotonous. [The guy who teaches about infectious diseases is the most boring person I ever met, I learn nothing from class, and on top of that he doesn't allow laptops to be out!] So, here's what I did in case it helps anyone else out there...

Step 1: Get the Required Programs

Make sure you have installed Python, cwtext, and lame. Now you're ready to roll!

Step 2: Prepare the Text to Encode

I went to Wikipedia and copy/pasted an ENTIRE article into a text file called in.txt. Don't worry about special characters (such as " and * and #), we'll fix them with the following python script.

import os
import time
f = open("out.txt")
raw = f.read()
f.close()

cmd = """echo "TEST" | cwpcm -w 7 | """
cmd += """lame -r -m m -b 8 --resample 8 -q9 - - > text.mp3"""

i = 0
for chunk in raw.split("n")[5:]:
    if chunk.count(" ") > 50:
        i += 1
        print "nnfile", i, chunk.count(" "), "wordsn"
        do = cmd.replace("TEST", chunk).replace("text", "%02d" % i)
        print "running:", do,
        time.sleep(1)
        print "nnSTART ...",
        os.system(do)
        print "DONE"

Step 3: Generate Morse Code Audio

There should be a new file, out.txt, which is cleaned-up nicely. Run the following script to turn every paragraph of text with more than 50 words into an mp3 file...

import os
f = open("out.txt")
raw = f.read()
f.close()
cmd = """echo "TEST" | cwpcm -w 13 | sox -r 44k -u -b 8 -t raw - text.wav"""
cmd += """; lame --preset phone text.wav text.mp3; rm text.wav"""
i = 0
for chunk in raw.split("n")[5:]:
    if chunk.count(" ") > 50:
        i += 1
        print i, chunk.count(" "), "words"
        os.system(cmd.replace("TEST", chunk).replace("text", "%02d" % i))

Now you should have a directory filled with mp3 files which you can skip through (or shuffle!) using your handy dandy mp3 player. Note that "-w 13" means 13 WPM (words per minute). Simply change that number to change the speed.

Good luck with your CW practice!

Markdown source code last modified on January 18th, 2021
---
title: Convert Text to CW Morse Code with Linux
date: 2010-02-02 10:58:54
tags: amateur radio, python, old
---

# Convert Text to CW Morse Code with Linux

__I wanted a way to have a bunch of Morse code mp3s on my mp3 player (with a WPM/speed that I decide__ and I found an easy way to do it with Linux. Rather than downloading existing mp3s of boring text, I wanted to be able to turn ANY text into Morse code, so I could copy something interesting (perhaps the news? hackaday? bash.org?). It's a little devious, but my plan is to practice copying Morse code during class when lectures become monotonous. \[The guy who teaches about infectious diseases is the most boring person I ever met, I learn nothing from class, and on top of that he doesn't allow laptops to be out!\] So, here's what I did in case it helps anyone else out there...

### Step 1: Get the Required Programs

Make sure you have installed [Python](http://www.Python.org), [cwtext](http://cwtext.sourceforge.net/), and [lame](http://lame.sourceforge.net/). Now you're ready to roll!

### Step 2: Prepare the Text to Encode

I went to Wikipedia and copy/pasted an ENTIRE article into a text file called in.txt. Don't worry about special characters (such as " and \* and \#), we'll fix them with the following python script.

```python
import os
import time
f = open("out.txt")
raw = f.read()
f.close()

cmd = """echo "TEST" | cwpcm -w 7 | """
cmd += """lame -r -m m -b 8 --resample 8 -q9 - - > text.mp3"""

i = 0
for chunk in raw.split("n")[5:]:
    if chunk.count(" ") > 50:
        i += 1
        print "nnfile", i, chunk.count(" "), "wordsn"
        do = cmd.replace("TEST", chunk).replace("text", "%02d" % i)
        print "running:", do,
        time.sleep(1)
        print "nnSTART ...",
        os.system(do)
        print "DONE"
```

### Step 3: Generate Morse Code Audio

There should be a new file, out.txt, which is cleaned-up nicely. Run the following script to turn every paragraph of text with more than 50 words into an mp3 file...

```python
import os
f = open("out.txt")
raw = f.read()
f.close()
cmd = """echo "TEST" | cwpcm -w 13 | sox -r 44k -u -b 8 -t raw - text.wav"""
cmd += """; lame --preset phone text.wav text.mp3; rm text.wav"""
i = 0
for chunk in raw.split("n")[5:]:
    if chunk.count(" ") > 50:
        i += 1
        print i, chunk.count(" "), "words"
        os.system(cmd.replace("TEST", chunk).replace("text", "%02d" % i))
```

Now you should have a directory filled with mp3 files which you can skip through (or shuffle!) using your handy dandy mp3 player. Note that "-w 13" means 13 WPM (words per minute). Simply change that number to change the speed.

Good luck with your CW practice!

July 26th, 2009

PySquelch: A Python-Based Frequency Audio Activity Monitor

I'm pretty much done with this project so it's time to formally document it. This project is a collaboration between Fred, KJ4LFJ who supplied the hardware and myself, Scott, KJ4LDF who supplied the software. Briefly, a scanner is set to a single frequency (147.120 MHz, the output of an active repeater in Orlando, FL) and the audio output is fed into the microphone hole of a PC sound card. The scripts below (run in the order they appear) detect audio activity, log the data, and display such data graphically.

Here is some sample output:

Live-running software is current available at: Fred's Site. The most current code can be found in its working directory. For archival purposes, I'll provide the code for pySquelch in ZIP format. Now, onto other things...

Markdown source code last modified on January 18th, 2021
---
title: PySquelch: A Python-Based Frequency Audio Activity Monitor
date: 2009-07-26 00:22:12
tags: python, old
---

# PySquelch: A Python-Based Frequency Audio Activity Monitor

__I'm pretty much done with this project so it's time to formally document it.__  This project is a collaboration between Fred, [KJ4LFJ](http://www.qrz.com/kj4lfj) who supplied the hardware and myself, Scott, [KJ4LDF](http://www.qrz.com/kj4ldf) who supplied the software.  Briefly, a scanner is set to a single frequency (147.120 MHz, the output of an [active repeater ](http://www.147120.com/) in Orlando, FL) and the audio output is fed into the microphone hole of a PC sound card.  The scripts below (run in the order they appear) detect audio activity, log the data, and display such data graphically.  

Here is some sample output:

<div class="text-center">

[![](test_24hr-1_thumb.jpg)](test_24hr-1.png)
[![](test_average_thumb.jpg)](test_average.png)
[![](test_alltime-1_thumb.jpg)](test_alltime-1.png)
[![](test_60min_thumb.jpg)](test_60min.png)

</div>

__Live-running software is current available at: [Fred's Site](http://kj4lfj.dyndns.org/147120/stream-data/pySquelch.html)__. The most current code can be found in its working directory.  For archival purposes, I'll provide [the code for pySquelch in ZIP format](http://www.SWHarden.com/blog/images/pysquelch.zip).  Now, onto other things...
June 19th, 2009

Reading PCM Audio with Python

When I figured this out I figured it was simply way too easy and way to helpful to keep to myself. Here I post (for the benefit of friends, family, and random Googlers alike) two examples of super-simplistic ways to read PCM data from Python using Numpy to handle the data and Matplotlib to display it. First, get some junk audio in PCM format (test.pcm).

import numpy
data = numpy.memmap("test.pcm", dtype='h', mode='r')
print "VALUES:",data

This code prints the values of the PCM file. Output is similar to:

VALUES: [-115 -129 -130 ...,  -72  -72  -72]

To graph this data, use matplotlib like so:

import numpy, pylab
data = numpy.memmap("test.pcm", dtype='h', mode='r')
print data
pylab.plot(data)
pylab.show()

This will produce a graph that looks like this:

Could it have been ANY easier? I'm so in love with python I could cry right now. With the powerful tools Numpy provides to rapidly and efficiently analyze large arrays (PCM potential values) combined with the easy-to-use graphing tools Matplotlib provides, I'd say you can get well on your way to analyzing PCM audio for your project in no time. Good luck!

FOR MORE INFORMATION AND CODE check out:

Let's get fancy and use this concept to determine the number of seconds in a 1-minute PCM file in which a radio transmission occurs. I was given a 1-minute PCM file with a ~45 second transmission in the middle. Here's the graph of the result of the code posted below it. (Detailed descriptions are at the bottom)

Figure description: The top trace (light blue) is the absolute value of the raw sound trace from the PCM file. The solid black line is the average (per second) of the raw audio trace. The horizontal dotted line represents the threshold, a value I selected. If the average volume for a second is above the threshold, that second is considered as "transmission" (1), if it's below the threshold it's "silent" (0). By graphing these 60 values in bar graph form (bottom window) we get a good idea of when the transmission starts and ends. Note that the ENTIRE graphing steps are for demonstration purposes only, and all the math can be done in the 1st half of the code. Graphing may be useful when determining the optimal threshold though. Even when the radio is silent, the microphone is a little noisy. The optimal threshold is one which would consider microphone noise as silent, but consider a silent radio transmission as a transmission.

### THIS CODE DETERMINES THE NUMBER OF SECONDS OF TRANSMISSION
### FROM A 60 SECOND PCM FILE (MAKE SURE PCM IS 60 SEC LONG!)
import numpy
threshold=80 # set this to suit your audio levels
dataY=numpy.memmap("test.pcm", dtype='h', mode='r') #read PCM
dataY=dataY-numpy.average(dataY) #adjust the sound vertically the avg is at 0
dataY=numpy.absolute(dataY) #no negative values
valsPerSec=float(len(dataY)/60) #assume audio is 60 seconds long
dataX=numpy.arange(len(dataY))/(valsPerSec) #time axis from 0 to 60
secY,secX,secA=[],[],[]
for sec in xrange(60):
    secData=dataY[valsPerSec*sec:valsPerSec*(sec+1)]
    val=numpy.average(secData)
    secY.append(val)
    secX.append(sec)
    if val>threshold: secA.append(1)
    else: secA.append(0)
print "%d sec of 60 used = %0.02f"%(sum(secA),sum(secA)/60.0)
raw_input("press ENTER to graph this junk...")

### CODE FROM HERE IS ONLY USED TO GRAPH THE DATA
### IT MAY BE USEFUL FOR DETERMINING OPTIMAL THRESHOLD
import pylab
ax=pylab.subplot(211)
pylab.title("PCM Data Fitted to 60 Sec")
pylab.plot(dataX,dataY,'b',alpha=.5,label="sound")
pylab.axhline(threshold,color='k',ls=":",label="threshold")
pylab.plot(secX,secY,'k',label="average/sec",alpha=.5)
pylab.legend()
pylab.grid(alpha=.2)
pylab.axis([None,None,-1000,10000])
pylab.subplot(212,sharex=ax)
pylab.title("Activity (Yes/No) per Second")
pylab.grid(alpha=.2)
pylab.bar(secX,secA,width=1,linewidth=0,alpha=.8)
pylab.axis([None,None,-0.5,1.5])
pylab.show()

The output of this code:

46 sec of 60 used = 0.77

Markdown source code last modified on January 18th, 2021
---
title: Reading PCM Audio with Python
date: 2009-06-19 09:08:33
tags: python, old
---

# Reading PCM Audio with Python

__When I figured this out__ I figured it was simply way too easy and way to helpful to keep to myself.  Here I post (for the benefit of friends, family, and random Googlers alike) two examples of super-simplistic ways to read [PCM](http://en.wikipedia.org/wiki/Pulse-code_modulation) data from Python using [Numpy](http://numpy.scipy.org/) to handle the data and [Matplotlib](http://matplotlib.sourceforge.net/) to display it.  First, get some junk audio in PCM format (test.pcm).

```python
import numpy
data = numpy.memmap("test.pcm", dtype='h', mode='r')
print "VALUES:",data
```

__This code prints the values of the PCM file.__ Output is similar to:

```
VALUES: [-115 -129 -130 ...,  -72  -72  -72]
```

__To graph this data, use matplotlib like so:__

```python
import numpy, pylab
data = numpy.memmap("test.pcm", dtype='h', mode='r')
print data
pylab.plot(data)
pylab.show()
```

__This will produce a graph that looks like this:__

<div class="text-center">

[![](audiograph_thumb.jpg)](audiograph.png)

</div>

__Could it have been ANY easier?__ I'm so in love with python I could cry right now.  With the powerful tools Numpy provides to rapidly and efficiently analyze large arrays (PCM potential values) combined with the easy-to-use graphing tools Matplotlib provides, I'd say you can get well on your way to analyzing PCM audio for your project in no time.  Good luck!

__FOR MORE INFORMATION AND CODE__ check out:
* [Linear Data Smoothing In Python](http://www.swharden.com/blog/2008-11-17-linear-data-smoothing-in-python/)
* [Signal Filtering With Python](http://www.swharden.com/blog/2009-01-21-signal-filtering-with-python/)
* [Circuits Vs. Software](http://www.swharden.com/blog/2009-01-15-circuits-vs-software/)
* [DIY ECG](http://www.swharden.com/blog/category/diy-ecg-home-made-electrocardiogram/) of entries.

__Let's get fancy and use this concept to determine the number of seconds in a 1-minute PCM file in which a radio transmission occurs.__  I was given a 1-minute PCM file with a ~45 second transmission in the middle.  Here's the graph of the result of the code posted below it.  (Detailed descriptions are at the bottom)

<div class="text-center">

[![](secpermin_thumb.jpg)](secpermin.png)

</div>

__Figure description:__ The top trace (light blue) is the absolute value of the raw sound trace from the PCM file.  The solid black line is the average (per second) of the raw audio trace.  The horizontal dotted line represents the _threshold_, a value I selected.  If the average volume for a second is above the threshold, that second is considered as "transmission" (1), if it's below the threshold it's "silent" (0).  By graphing these 60 values in bar graph form (bottom window) we get a good idea of when the transmission starts and ends.  Note that the ENTIRE graphing steps are for demonstration purposes only, and all the math can be done in the 1st half of the code.  Graphing may be useful when determining the optimal threshold though.  Even when the radio is silent, the microphone is a little noisy.  The optimal threshold is one which would consider microphone noise as silent, but consider a silent radio transmission as a transmission.

```python
### THIS CODE DETERMINES THE NUMBER OF SECONDS OF TRANSMISSION
### FROM A 60 SECOND PCM FILE (MAKE SURE PCM IS 60 SEC LONG!)
import numpy
threshold=80 # set this to suit your audio levels
dataY=numpy.memmap("test.pcm", dtype='h', mode='r') #read PCM
dataY=dataY-numpy.average(dataY) #adjust the sound vertically the avg is at 0
dataY=numpy.absolute(dataY) #no negative values
valsPerSec=float(len(dataY)/60) #assume audio is 60 seconds long
dataX=numpy.arange(len(dataY))/(valsPerSec) #time axis from 0 to 60
secY,secX,secA=[],[],[]
for sec in xrange(60):
    secData=dataY[valsPerSec*sec:valsPerSec*(sec+1)]
    val=numpy.average(secData)
    secY.append(val)
    secX.append(sec)
    if val>threshold: secA.append(1)
    else: secA.append(0)
print "%d sec of 60 used = %0.02f"%(sum(secA),sum(secA)/60.0)
raw_input("press ENTER to graph this junk...")

### CODE FROM HERE IS ONLY USED TO GRAPH THE DATA
### IT MAY BE USEFUL FOR DETERMINING OPTIMAL THRESHOLD
import pylab
ax=pylab.subplot(211)
pylab.title("PCM Data Fitted to 60 Sec")
pylab.plot(dataX,dataY,'b',alpha=.5,label="sound")
pylab.axhline(threshold,color='k',ls=":",label="threshold")
pylab.plot(secX,secY,'k',label="average/sec",alpha=.5)
pylab.legend()
pylab.grid(alpha=.2)
pylab.axis([None,None,-1000,10000])
pylab.subplot(212,sharex=ax)
pylab.title("Activity (Yes/No) per Second")
pylab.grid(alpha=.2)
pylab.bar(secX,secA,width=1,linewidth=0,alpha=.8)
pylab.axis([None,None,-0.5,1.5])
pylab.show()
```

__The output of this code:__

```46 sec of 60 used = 0.77```
Pages