## Copyright (C) 2009 Edoardo Pasca
##
## This program parses a RSS or Atom feed and reformats it for pdfLaTeX
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; If not, see <http://www.gnu.org/licenses/>.

# -*- coding: cp1252 -*-

import feedparser
import re
import os
import locale
import time
import codecs

_rimedi = { u'\x2019': "'" }

def rimedia(exc):
    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
        erore = exc.object[exc.start:exc.end]
        if len(erore)==1 and erore in _rimedi: return _rimedi[erore]
    raise exc

codecs.register_error('rimedia', rimedia)

outf = codecs.open('filename.txt', 'w', 'iso-8859-15', errors='rimedia')

language, output_encoding = locale.getdefaultlocale()

def remove_html_tags(data):
    p = re.compile(r'<.*?>')
    return p.sub('', data)

def remove_html_special_char(data):
    #should clean and substitute the proper. Now just delete it
    p = re.compile(r'&.*?;')
    return p.sub('', data)

def substitute_u2029_char(data):
    #should clean and substitute the proper. Now just delete it
    p = re.compile(r'\\u2019')
    return p.sub('\'', data)

def br2dobleLine(data):
    p = re.compile(r'<br />')
    return p.sub('\n\n',data)

#d = feedparser.parse("http://voglioscendere.ilcannocchiale.it/blogs/feeds/blogrss20.aspx?blogid=32495")
d = feedparser.parse("http://antefatto.ilcannocchiale.it/blogs/feeds/blogrss20.aspx?blogid=96578")

print "feed parsed"
##print e.title
##print desc

#os.chdir ("/Users/paskino/temp/")

header = """\documentclass[iliad,12pt,oneside,onecolumn,final,openany]{iliad}
\usepackage[latin1]{inputenc}
\usepackage[italian]{babel}
\usepackage{hyperref}   
\setlength{\hoffset}{-0.8 in}
%remove
\setlength{\\voffset}{-1 in}
\setlength{\\textwidth}{\paperwidth}
\\addtolength{\\textwidth}{-9mm}
\setlength{\\textheight}{\paperheight}
\\addtolength{\\textheight}{-22mm}
\\title{Voglioscendere}
\\begin{document}
\\tableofcontents
"""
footer="\end{document}"
e = []
news = []
section = header

actualday = time.localtime()
for i in range(len(d['items'])):
    #get the posts of today and/or yesterday (1)
    if (actualday.tm_yday - d.entries[i].date_parsed.tm_yday <= 2):
        e.append(d.entries[i])
        desc =remove_html_special_char(remove_html_tags(br2dobleLine(e[i].description)))
        #desc =remove_html_tags(br2dobleLine(remove_html_special_char(e[i].description)))

        news= "\section{"+e[i].title+"}"+desc
        section += news
section += footer
##section = header+news[1]+footer
f=open("prova.tex","w")
f.write(section.encode("ISO-8859-1", "ignore"))
f.close()

