#!/usr/bin/python
#(c) Holger Blasum holgerlists att blasum.net 2005-05-15
#LICENSE: GPL, version 0.01
#for raw output from scanadf
#contact tom att martoneconsulting.com for other raw tools
#adapt: Width (here: 2552), Height (here: 3507) to your own
#scanner output (mine is a Fujitsu M3093).
#usage: python raw2pdf.py image1 image2 ... > out.pdf
#eg "ls image-* | xargs python > out.pdf"

import sys,string
sys.argv.pop(0)
data = []
objs = []
for a in sys.argv:
	f = open(a)
	data.append(f.read())
	f.close()	

xref = []
bcount = 0

#register offset (bcount) while printing
def bprint(s):
	global bcount
	st = "%s" % s
	bcount = bcount + len(st)
	print st,	

#PDF object class
class obj:
	def __init__(self):
		global bcount
		global objs
		self.offset = bcount
		self.id = len(objs)+1
		self.attr = []
		self.stream = None
		objs.append(self)
	def add (self,a):
		self.attr.append(a)
	def addstream(self,d):
		self.stream = d
	def __repr__(self):
		s = ("%d 0 obj<<" % self.id)
		for a in self.attr:
			s = s + a
		s = s + '>>'
		if self.stream != None:
			s = s + 'stream\n'
			s = s + self.stream
			s = s + '\nendstream'	
		s = s + '\nendobj\n'
		return s

#start writing PDF
bprint ("%PDF-1.2\n")
bprint ("%\x90\x84\x86\x8f\n")

#document structure
o_catalog = obj()
o_catalog.add ('/Type/Catalog')
o_catalog.add ('/Pages 2 0 R')
bprint (o_catalog)

pobjcounter = 3
pagefield = []
for d in data:
	pagestring = pagefield.append(("%d 0 R" % pobjcounter))
	pobjcounter = pobjcounter + 4
o_pages = obj()
o_pages.add ("/Type/Pages")
o_pages.add ("/Kids[%s]" % string.join(pagefield,' '))
o_pages.add ("/Count %d" % len(data))
bprint (o_pages)

#fill in bulk data
for d in data:
	o_page = obj()
	o_page.add('/Type/Page')
	o_page.add(("/Contents %d 0 R") % (o_page.id + 3))
	o_page.add(("/Resources %d 0 R") % (o_page.id + 1))
	o_page.add('/MediaBox[0 0 595 842]')
	o_page.add('/Parent 2 0 R')
	bprint(o_page)
	o_resources = obj()
	o_resources.add("/XObject<</I0 %d 0 R>>" % (o_resources.id + 1))
	o_resources.add('/ProcSet[/PDF/ImageB]')
	bprint(o_resources)
	o_xobject = obj()
	o_xobject.add('/Type/XObject')
	o_xobject.add('/Subtype/Image')
	o_xobject.add('/Filter/CCITTFaxDecode')
	o_xobject.add('/Width 2552')
	o_xobject.add('/Height 3507')
	o_xobject.add('/BitsPerComponent 1')
	o_xobject.add('/Name/I0')
	o_xobject.add('/DecodeParms<</K -1/Columns 2552>>')
	o_xobject.add('/ColorSpace/DeviceGray')
	o_xobject.add("/Length %d" % len(d))
	o_xobject.addstream(d)
	bprint(o_xobject)
	o_contents = obj()
	o_contents.add('/Length 25')
	o_contents.addstream('595 0 0 842 0 0 cm\n/I0 Do')
	bprint(o_contents)	

#final xref table
xrefpos = bcount
bprint ("xref\n")
bprint (("0 %d\n" % (len(objs)+1)))
bprint ("0000000000 65535 f \n")
for o in objs:
	bprint (("%010d 00000 n \n" % o.offset))
bprint ("trailer\n")
bprint (("<</Size %d/Root 1 0 R>>\n" % (len(objs)+1)))
bprint ("startxref\n")
bprint (("%d\n" % xrefpos))
bprint ("%%EOF") 	
