Base64 encoder

Started by pjot, March 01, 2004, 01:54:38 PM

Previous topic - Next topic

HPW

#30
Here now with 1.02 of the DLL going online this evening:



> (import "hpwNLUtility.dll" "hpwMimeEncodeStringNoCRLF")

hpwMimeEncodeStringNoCRLF <ED373C>

> (import "hpwNLUtility.dll" "hpwMimeDecodeString")

hpwMimeDecodeString <ED37A0>

> (time (setq a(BASE64:encode bigtxt)))

2703

> (time(setq b(get-string(hpwMimeEncodeStringNoCRLF bigtxt))))

0

> (length a)

66696

> (length b)

66696
Hans-Peter

Lutz

#31
Actually the lne-feeds should stay in to protect mailservers with max line-length < 76. As long as BASE64:decode filters those out there is no problem.



Lutz

pjot

#32
HPW: All right! That is good news, so it seems we end up with the same result. Thank you for testing!



So I can try to increase the conversion speed then...



Lutz: I am not sure what you mean by placing the (if... length...) stuff outside the loop since that would result in a non-working algorithm...? Can you show a small example of what you mean?



Peter.

Lutz

#33
lets say the data string is 100 chars long than you check a 98 times if the length is > 2 > 1 etc.



If the while loop runs only (while (> (length dat) 2) then the little rest of 1 or two characters can be processes after the loop. And inside the loop you always can assume length > 2.



Lutz

pjot

#34
OK I understand. Tomorrow I will investigate improvements on the Base64 encoder. Thanks.

pjot

#35
The base64 routine is smaller and faster now. But I still think I can perform more optimizations. The base64 value calculation can be unified I suppose. In the meantime I have advanced to this code:



------------------------------------------------------

#!/usr/bin/newlisp

;;

;; Base64 converter using newLISP. Tested on Slackware 9.1 with newLISP 7.5.6.

;;

;; Proxy-servers require a base64 encoded <username:password> to pass through.

;;

;; With this encoder you can hack your way out :-)

;;

;; Improved after hints and tips from Lutz.

;;

;;----------------------------------------------------------------------------



;; Setup base64 encode string

(set 'BASE64 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")



;; Get input from user

(print "Enter a string to convert: ")

(set 'dat (read-line))



;; Initialize result variable to string

(set 'enc "")



;; Mainloop

(while (> (length dat) 2) (begin

   

   ;; Find ASCII values

   (set 'byte1 (char dat))

   (set 'byte2 (char dat 1))

   (set 'byte3 (char dat 2))



   ;; Now create BASE64 values

   (set 'base1 (/ byte1 4))

   (set 'base2 (+ (* (& byte1 3) 16) (/ (& byte2 240) 16)))

   (set 'base3 (+ (* (& byte2 15) 4) (/ (& byte3 192) 64)))

   (set 'base4 (& byte3 63))



   ;; Find BASE64 characters

   (set 'enc (append enc (nth base1 BASE64) (nth base2 BASE64) (nth base3 BASE64) (nth base4 BASE64) ))

   ;; Decrease 'dat' with 3 characters

   (set 'dat (slice dat 3))

))



;; From here determine last characters and/or padding

(set 'byte1 (char dat))

(set 'byte3 0)

(if (= (length dat) 2) (set 'byte2 (char dat 1)) (set 'byte2 0))



;; Create BASE64 values

(set 'base1 (/ byte1 4))

(set 'base2 (+ (* (& byte1 3) 16) (/ (& byte2 240) 16)))

(set 'base3 (+ (* (& byte2 15) 4) (/ (& byte3 192) 64)))

(set 'base4 (& byte3 63))



;; Find last BASE64 characters

(if (= (length dat) 1) (set 'enc (append enc (nth base1 BASE64) (nth base2 BASE64) "==")))

(if (= (length dat) 2) (set 'enc (append enc (nth base1 BASE64) (nth base2 BASE64) (nth base3 BASE64) "=")))



;; Print resulting string

(println enc)



;; Exit

(exit)

HPW

#36
A little optimized through eliminating multiple set to one.





;;
;; Base64 converter using newLISP. Tested on Slackware 9.1 with newLISP 7.5.4.
;;
;; Proxy-servers require a base64 encoded "username:password" to pass through.
;;
;; With this encoder you can hack your way out :-)
;;
;;----------------------------------------------------------------------------

;; Setup base64 encode string

(context 'base64)


(define (encode dat)

(set 'base64charset "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")

;; Get input from user
;(print "Enter a string to convert: ")
;(set 'dat (read-line))

;; Initialize result variable to string
(set 'enc "")

;; Mainloop
(while (> (length dat) 2) (begin

;; Find ASCII values
(set 'byte1 (char dat)
'byte2 (char dat 1)
'byte3 (char dat 2)

;; Now create BASE64 values
'base1 (/ byte1 4)
'base2 (+ (* (& byte1 3) 16) (/ (& byte2 240) 16))
'base3 (+ (* (& byte2 15) 4) (/ (& byte3 192) 64))
'base4 (& byte3 63)

;; Find BASE64 characters
'enc (append enc (nth base1 base64charset) (nth base2 base64charset) (nth base3 base64charset) (nth base4 base64charset) )
;; Decrease 'dat' with 3 characters
'dat (slice dat 3)
)))

(if (> (length dat) 0)
(begin
;; From here determine last characters and/or padding
(set 'byte1 (char dat)
'byte3 0)
(if (= (length dat) 2) (set 'byte2 (char dat 1)) (set 'byte2 0))

;; Create BASE64 values
(set 'base1 (/ byte1 4)
'base2 (+ (* (& byte1 3) 16) (/ (& byte2 240) 16))
'base3 (+ (* (& byte2 15) 4) (/ (& byte3 192) 64))
'base4 (& byte3 63))

;; Find last BASE64 characters
(if (= (length dat) 1) (set 'enc (append enc (nth base1 base64charset) (nth base2 base64charset) "==")))
(if (= (length dat) 2) (set 'enc (append enc (nth base1 base64charset) (nth base2 base64charset) (nth base3 base64charset) "=")))
))


;; Return resulting string
enc)





;;
;; Base64 decoder
;;
;; Due to the nature of newLISP, this is the smallest
;; BASE64 decoder I've ever written.
;;
;;-------------------------------------------------------

(define (decode dat)

;; Setup base64 string
(set 'base64charset "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")

;; Get input from user
;(print "Enter a string to convert: ")
;(set 'dat (read-line))

;; Initialize result variable to string
(set 'res "")

;; Mainloop
(while (> (length dat) 0)
(begin
;; Find the indexnumber in the base64charset definition
(set 'byte1 (find (nth 0 dat) base64charset))
(if (= byte1 nil)(set 'byte1 0))
(set 'byte2 (find (nth 1 dat) base64charset))
(if (= byte2 nil)(set 'byte2 0))
(set 'byte3 (find (nth 2 dat) base64charset))
(if (= byte3 nil)(set 'byte3 0))
(set 'byte4 (find (nth 3 dat) base64charset))
(if (= byte4 nil)(set 'byte4 0))

;; Recalculate to ASCII value
(set 'res (append res
(char (+ (* (& byte1 63) 4) (/ (& byte2 48) 16)))
(char (+ (* (& byte2 15) 16) (/ (& byte3 60) 4)))
(char (+ (* (& byte3 3) 64) byte4))))

;; Decrease string with 4
(set 'dat (slice dat 4))))

;; Print resulting string
(trim res "00")
)

(context 'MAIN)

Hans-Peter

Lutz

#37
another optimization, on bigger files repeated string appending gets very expensive because the string is growing and growing. The following change pushes all strings on a list than does a 'join'. On files smaller than a few Kbyte this is slightly slower but on a 100Kbyte file speed increases 10 fold! Ecoding newlisp.c in 4 seconds instead of 40seconds. On bigger files the difference will be even more.



I have used this technique of string appending in many programs with sometimes dramatic performance differences.





;;
;; Base64 converter using newLISP. Tested on Slackware 9.1 with newLISP 7.5.4.
;;
;; Proxy-servers require a base64 encoded "username:password" to pass through.
;;
;; With this encoder you can hack your way out :-)
;;
;;----------------------------------------------------------------------------

;; Setup base64 encode string

(context 'base64)


(define (encode dat)

(set 'base64charset "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")

(define (to64)
  (set 'base1 (/ byte1 4)
       'base2 (+ (* (& byte1 3) 16) (/ (& byte2 240) 16))
       'base3 (+ (* (& byte2 15) 4) (/ (& byte3 192) 64))
       'base4 (& byte3 63)))

;; Initialize result variable to string
(set 'enc '())

;; Mainloop
(while (> (length dat) 2) (begin

   ;; Find ASCII values
   (set 'byte1 (char dat)
        'byte2 (char dat 1)
        'byte3 (char dat 2))
   
   ;; Now create BASE64 values
   (to64)
 
   ;; Find BASE64 characters
   (push (append (nth base1 base64charset) (nth base2 base64charset)
                 (nth base3 base64charset) (nth base4 base64charset)) enc)

   ;; Decrease 'dat' with 3 characters
   (set 'dat (slice dat 3))
   ))

(if (> (length dat) 0)
(begin

;; From here determine last characters and/or padding
(set   'byte1 (char dat)
   'byte3 0)
(if (= (length dat) 2) (set 'byte2 (char dat 1)) (set 'byte2 0))

;; Create BASE64 values
(to64)

;; Find last BASE64 characters
(if (= (length dat) 1) (push (append (nth base1 base64charset) (nth base2 base64charset) "==") enc))
(if (= (length dat) 2) (push (append (nth base1 base64charset) (nth base2 base64charset)
                             (nth base3 base64charset) "=") enc))
))


;; Return resulting string
(trim (join (reverse enc))))


Lutz

Lutz

#38
oops, should have moved the define of 'to64' out of the (define (encode ....) but it doesn't really matter.



Lutz

pjot

#39
Phew newLISP indeed is very powerfull! I am learning all the time here. Thank you HPW and Lutz for your examples and explanations!



Peter.