(* $Id: utf8.ml,v 1.3 2001/03/01 17:11:13 mjambon Exp $ *)

(* Conversion from 2 bytes utf-8 to iso-8859-1 *)

let one_byte i = i lsr 7 = 1
let two_bytes i1 i2 =
  i1 lsr 5 = 6 && i2 lsr 6 = 2

let decode_one i = i
let decode_two i1 i2 =
  (i1 lsl 6) lor (i2 land 63) land 255

let decode s =
  let i = ref 0
  and j = ref 0 in
  let len = String.length s in
  let s' = String.create len in
  while !i < len do
    let x = !i in
    let c1 = int_of_char s.[x] in
    let x' = x+1 in 
    if x'<len then
      let c2 = int_of_char s.[x'] in
      if two_bytes c1 c2 then
	(s'.[!j] <- char_of_int (decode_two c1 c2);
	 incr i)
      else
	s'.[!j] <- char_of_int c1
    else
      s'.[!j] <- char_of_int c1;
    incr i;
    incr j
  done;
  String.sub s' ~pos:0 ~len:!j


let rec bin = function 0 -> () | n -> bin (n/2); print_int (n mod 2)

let rec bin x = function 
    0 -> ()
  | bits -> bin (x lsr 1) (bits-1); if x <> 0 then print_int (x mod 2)

let rec bin x = function 
    0 -> ()
  | bits -> bin (x lsr 1) (bits-1); print_int (x land 1)


let h1 = 0xc0
let h2 = 0x80
let mask1 = 0x1f
let mask2 = 0x3f

let code c =
  let i = int_of_char c in
  let part1 = (i lsr 6) land mask1
  and part2 = i land mask2 in
  let c1 = h1 lor part1
  and c2 = h2 lor part2 in
  let s = String.create 2 in
  s.[0] <- char_of_int c1;
  s.[1] <- char_of_int c2;
  s

let add_code buf c =
  let i = int_of_char c in
  let part1 = (i lsr 6) land mask1
  and part2 = i land mask2 in
  let c1 = h1 lor part1
  and c2 = h2 lor part2 in
  Buffer.add_char buf (char_of_int c1);
  Buffer.add_char buf (char_of_int c2)
