Difference between revisions of "User:Ypolius"

From eLinux.org
Jump to: navigation, search
Line 10: Line 10:
  
 
     # -*- coding: utf-8 -*-
 
     # -*- coding: utf-8 -*-
import commands
+
    import commands
import HTMLParser
+
    import HTMLParser
import re
+
    import re
class CustomParser(HTMLParser.HTMLParser):
+
    class CustomParser(HTMLParser.HTMLParser):
    section = 'intro'
+
        section = 'intro'
    reading = ""
+
        reading = ""
    dictionary = {}
+
        dictionary = {}
    def __init__(self, *args, **kwargs):
+
        def __init__(self, *args, **kwargs):
        HTMLParser.HTMLParser.__init__(self)
+
            HTMLParser.HTMLParser.__init__(self)
        self.stack = []
+
            self.stack = []
    def handle_starttag(self,  tag,  attrs):
+
        def handle_starttag(self,  tag,  attrs):
        if tag.lower() == 'p':
+
            if tag.lower() == 'p':
            self.reading = 'text'
 
        if tag.lower() == 'h2':
 
            if(self.section == 'intro'):
 
                self.dictionary[self.section]="".join(self.stack)
 
                self.stack = []
 
            if(self.section != 'Contents' and self.section != 'intro'):
 
                self.dictionary[self.section]= "".join(self.stack)
 
                self.stack = []
 
            self.reading = 'title'
 
            self.section = ""
 
        if(self.section == 'Contents'):
 
            if(tag.lower() == 'span'):
 
 
                 self.reading = 'text'
 
                 self.reading = 'text'
    def handle_endtag(self,  tag):
+
            if tag.lower() == 'h2':
        if tag.lower() == 'p':
+
                if(self.section == 'intro'):
            self.reading = ""
+
                    self.dictionary[self.section]="".join(self.stack)
        if tag.lower() == 'h2':
+
                    self.stack = []
            self.reading = ""
+
                if(self.section != 'Contents' and self.section != 'intro'):
 +
                    self.dictionary[self.section]= "".join(self.stack)
 +
                    self.stack = []
 +
                self.reading = 'title'
 +
                self.section = ""
 
             if(self.section == 'Contents'):
 
             if(self.section == 'Contents'):
                 self.dictionary['Contents'] = []
+
                 if(tag.lower() == 'span'):
        if tag.lower() == 'table':   
+
                    self.reading = 'text'
            if self.section == 'Contents':
+
        def handle_endtag(self,  tag):
                self.reading = ""   
+
            if tag.lower() == 'p':
            if self.section == 'intro':                 
+
                self.reading = ""
                self.reading = 'text'
+
            if tag.lower() == 'h2':
        if tag.lower() == 'span':             
+
                self.reading = ""
            if self.section == 'Contents':
+
                if(self.section == 'Contents'):
                self.reading = ""   
+
                    self.dictionary['Contents'] = []
                self.dictionary[self.section].append("".join(self.stack))
+
            if tag.lower() == 'table':   
                self.stack = []
+
                if self.section == 'Contents':
    def handle_data(self,  data):
+
                    self.reading = ""   
        if self.reading == 'text':
+
                if self.section == 'intro':                 
            self.stack.append(data)
+
                    self.reading = 'text'
        if self.reading == 'title':
+
            if tag.lower() == 'span':             
            self.section += data
+
                if self.section == 'Contents':
topic = raw_input('Enter a wikipedia topic you would like to hear about\n').replace(' ',  '+')
+
                    self.reading = ""   
commands.getstatusoutput('wget --output-document=result.html http://en.wikipedia.org/wiki/Special:Search?search=' + topic +'&go=Go')
+
                    self.dictionary[self.section].append("".join(self.stack))
parser = CustomParser()
+
                    self.stack = []
html = open('result.html',  'r')
+
        def handle_data(self,  data):
parser.feed(html.read())
+
            if self.reading == 'text':
html.close()
+
                self.stack.append(data)
parser.close()
+
            if self.reading == 'title':
#cleanup files
+
                self.section += data
for k, v in parser.dictionary.iteritems():
+
    topic = raw_input('Enter a wikipedia topic you would like to hear about\n').replace(' ',  '+')
    if k != 'Contents':
+
    commands.getstatusoutput('wget --output-document=result.html http://en.wikipedia.org   /wiki/Special:Search?search=' + topic +'&go=Go')
        parser.dictionary[k] = re.sub('\[[0-9]\]', '',  v)
+
    parser = CustomParser()
commands.getstatusoutput('mkdir output')
+
    html = open('result.html',  'r')
output = open('output/intro.txt',  'w')
+
    parser.feed(html.read())
#start with intro
+
    html.close()
output.write('intro\n')
+
    parser.close()
output.write(parser.dictionary['intro'])
+
    #cleanup files
output.close()
+
    for k, v in parser.dictionary.iteritems():
for k in range(len(parser.dictionary['Contents'])):
+
        if k != 'Contents':
    if k % 2 != 0:
+
            parser.dictionary[k] = re.sub('\[[0-9]\]', '',  v)
        try:     
+
    commands.getstatusoutput('mkdir output')
            section = parser.dictionary['Contents'][k]
+
    output = open('output/intro.txt',  'w')
            parser.dictionary[section]     
+
    #start with intro
            output = open('output/' + section + '.txt', 'w')
+
    output.write('intro\n')
            output.write(section + '\n')
+
    output.write(parser.dictionary['intro'])
            output.write(parser.dictionary[section])
+
    output.close()
        except KeyError:
+
    for k in range(len(parser.dictionary['Contents'])):
            print 'no section: ' + section
+
        if k % 2 != 0:
        output.close()
+
            try:     
#commands.getstatusoutput('flite output.txt speech.wav')
+
                section = parser.dictionary['Contents'][k]
#commands.getstatusoutput('aplay speech.wav')
+
                parser.dictionary[section]     
 +
                output = open('output/' + section + '.txt', 'w')
 +
                output.write(section + '\n')
 +
                output.write(parser.dictionary[section])
 +
            except KeyError:
 +
                print 'no section: ' + section
 +
            output.close()
 +
    #commands.getstatusoutput('flite output.txt speech.wav')
 +
    #commands.getstatusoutput('aplay speech.wav')
  
  
 
----
 
----
 
The python package can be installed with the 'opkg install python' command. Since this script depends on HTMLParser and the python installation is minimal, you need to get some dependencies. They are HTMLParser.py, markupbase.py, and htmlenvprefs.py (unsure of name, something similar) and can be obtained from a more fleshed out install. I copied my files over using scp from an Arch Linux python installation and it worked fine.
 
The python package can be installed with the 'opkg install python' command. Since this script depends on HTMLParser and the python installation is minimal, you need to get some dependencies. They are HTMLParser.py, markupbase.py, and htmlenvprefs.py (unsure of name, something similar) and can be obtained from a more fleshed out install. I copied my files over using scp from an Arch Linux python installation and it worked fine.

Revision as of 12:24, 26 April 2010

I am a senior Computer Science student who has elected to take this course in order to get more familiar with linux on the kernel side. I am very comfortable with the linux environment and would love to learn how it works. Currently I am working on the audio mbox project.


Project Work 3/30/2010 So far I have gotten a python script that runs on the beagleboard. This script takes in a textual topic and speaks the intro of the topic's wiki page using flite. The script is:


   # -*- coding: utf-8 -*-
   import commands
   import HTMLParser
   import re
   class CustomParser(HTMLParser.HTMLParser):
       section = 'intro'
       reading = ""
       dictionary = {}
       def __init__(self, *args, **kwargs):
           HTMLParser.HTMLParser.__init__(self)
           self.stack = []
       def handle_starttag(self,  tag,  attrs):
           if tag.lower() == 'p':
               self.reading = 'text'
           if tag.lower() == 'h2':
               if(self.section == 'intro'):
                   self.dictionary[self.section]="".join(self.stack)
                   self.stack = []
               if(self.section != 'Contents' and self.section != 'intro'):
                   self.dictionary[self.section]= "".join(self.stack)
                   self.stack = []
               self.reading = 'title'
               self.section = ""
           if(self.section == 'Contents'):
               if(tag.lower() == 'span'):
                   self.reading = 'text'
       def handle_endtag(self,  tag):
           if tag.lower() == 'p':
               self.reading = ""
           if tag.lower() == 'h2':
               self.reading = ""
               if(self.section == 'Contents'):
                   self.dictionary['Contents'] = []
           if tag.lower() == 'table':  
               if self.section == 'Contents':
                   self.reading = ""  
               if self.section == 'intro':                
                   self.reading = 'text'
           if tag.lower() == 'span':            
               if self.section == 'Contents':
                   self.reading = ""  
                   self.dictionary[self.section].append("".join(self.stack))
                   self.stack = []
       def handle_data(self,  data):
           if self.reading == 'text':
               self.stack.append(data)
           if self.reading == 'title':
               self.section += data
   topic = raw_input('Enter a wikipedia topic you would like to hear about\n').replace(' ',  '+')
   commands.getstatusoutput('wget --output-document=result.html http://en.wikipedia.org    /wiki/Special:Search?search=' + topic +'&go=Go')
   parser = CustomParser()
   html = open('result.html',  'r')
   parser.feed(html.read())
   html.close()
   parser.close()
   #cleanup files
   for k, v in parser.dictionary.iteritems():
       if k != 'Contents':
           parser.dictionary[k] = re.sub('\[[0-9]\]', ,  v)
   commands.getstatusoutput('mkdir output')
   output = open('output/intro.txt',  'w')
   #start with intro
   output.write('intro\n')
   output.write(parser.dictionary['intro'])
   output.close()
   for k in range(len(parser.dictionary['Contents'])):
       if k % 2 != 0:
           try:    
               section = parser.dictionary['Contents'][k]
               parser.dictionary[section]    
               output = open('output/' + section + '.txt', 'w')
               output.write(section + '\n')
               output.write(parser.dictionary[section])
           except KeyError:
               print 'no section: ' + section
           output.close()
   #commands.getstatusoutput('flite output.txt speech.wav')
   #commands.getstatusoutput('aplay speech.wav')



The python package can be installed with the 'opkg install python' command. Since this script depends on HTMLParser and the python installation is minimal, you need to get some dependencies. They are HTMLParser.py, markupbase.py, and htmlenvprefs.py (unsure of name, something similar) and can be obtained from a more fleshed out install. I copied my files over using scp from an Arch Linux python installation and it worked fine.