array(UC_LINE_BREAK_OP => LB_PROHIBITED,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_PROHIBITED,
UC_LINE_BREAK_GL => LB_PROHIBITED,
UC_LINE_BREAK_NS => LB_PROHIBITED,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_PROHIBITED,
UC_LINE_BREAK_PO => LB_PROHIBITED,
UC_LINE_BREAK_NU => LB_PROHIBITED,
UC_LINE_BREAK_AL => LB_PROHIBITED,
UC_LINE_BREAK_ID => LB_PROHIBITED,
UC_LINE_BREAK_IN => LB_PROHIBITED,
UC_LINE_BREAK_HY => LB_PROHIBITED,
UC_LINE_BREAK_BA => LB_PROHIBITED,
UC_LINE_BREAK_BB => LB_PROHIBITED,
UC_LINE_BREAK_B2 => LB_PROHIBITED,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_PROHIBITED_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_PROHIBITED,
UC_LINE_BREAK_H3 => LB_PROHIBITED,
UC_LINE_BREAK_JL => LB_PROHIBITED,
UC_LINE_BREAK_JV => LB_PROHIBITED,
UC_LINE_BREAK_JT => LB_PROHIBITED),
UC_LINE_BREAK_CL => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_PROHIBITED,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_QU => array(UC_LINE_BREAK_OP => LB_PROHIBITED,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_INDIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_INDIRECT,
UC_LINE_BREAK_B2 => LB_INDIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_GL => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_INDIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_INDIRECT,
UC_LINE_BREAK_B2 => LB_INDIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_NS => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_EX => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_SY => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_IS => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_PR => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_INDIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_PO => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_NU => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_AL => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_ID => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_IN => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_HY => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_BA => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_BB => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_INDIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_INDIRECT,
UC_LINE_BREAK_B2 => LB_INDIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_B2 => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_PROHIBITED,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_ZW => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_DIRECT,
UC_LINE_BREAK_QU => LB_DIRECT,
UC_LINE_BREAK_GL => LB_DIRECT,
UC_LINE_BREAK_NS => LB_DIRECT,
UC_LINE_BREAK_EX => LB_DIRECT,
UC_LINE_BREAK_SY => LB_DIRECT,
UC_LINE_BREAK_IS => LB_DIRECT,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_DIRECT,
UC_LINE_BREAK_HY => LB_DIRECT,
UC_LINE_BREAK_BA => LB_DIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_DIRECT,
UC_LINE_BREAK_WJ => LB_DIRECT,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_CM => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_DIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_WJ => array(UC_LINE_BREAK_OP => LB_INDIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_INDIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_INDIRECT,
UC_LINE_BREAK_AL => LB_INDIRECT,
UC_LINE_BREAK_ID => LB_INDIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_INDIRECT,
UC_LINE_BREAK_B2 => LB_INDIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_H2 => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_H3 => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_JL => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_INDIRECT,
UC_LINE_BREAK_H3 => LB_INDIRECT,
UC_LINE_BREAK_JL => LB_INDIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_DIRECT),
UC_LINE_BREAK_JV => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_INDIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT),
UC_LINE_BREAK_JT => array(UC_LINE_BREAK_OP => LB_DIRECT,
UC_LINE_BREAK_CL => LB_PROHIBITED,
UC_LINE_BREAK_QU => LB_INDIRECT,
UC_LINE_BREAK_GL => LB_INDIRECT,
UC_LINE_BREAK_NS => LB_INDIRECT,
UC_LINE_BREAK_EX => LB_PROHIBITED,
UC_LINE_BREAK_SY => LB_PROHIBITED,
UC_LINE_BREAK_IS => LB_PROHIBITED,
UC_LINE_BREAK_PR => LB_DIRECT,
UC_LINE_BREAK_PO => LB_INDIRECT,
UC_LINE_BREAK_NU => LB_DIRECT,
UC_LINE_BREAK_AL => LB_DIRECT,
UC_LINE_BREAK_ID => LB_DIRECT,
UC_LINE_BREAK_IN => LB_INDIRECT,
UC_LINE_BREAK_HY => LB_INDIRECT,
UC_LINE_BREAK_BA => LB_INDIRECT,
UC_LINE_BREAK_BB => LB_DIRECT,
UC_LINE_BREAK_B2 => LB_DIRECT,
UC_LINE_BREAK_ZW => LB_PROHIBITED,
UC_LINE_BREAK_CM => LB_INDIRECT_CM,
UC_LINE_BREAK_WJ => LB_PROHIBITED,
UC_LINE_BREAK_H2 => LB_DIRECT,
UC_LINE_BREAK_H3 => LB_DIRECT,
UC_LINE_BREAK_JL => LB_DIRECT,
UC_LINE_BREAK_JV => LB_DIRECT,
UC_LINE_BREAK_JT => LB_INDIRECT));
/**
* See CSS 2.1 16.6.1 The 'white-space' processing model
*/
class InlineContentBuilder {
function InlineContentBuilder() {
}
function add_line_break(&$box, &$pipeline) {
$break_box =& new BRBox();
$break_box->readCSS($pipeline->get_current_css_state());
$box->add_child($break_box);
}
function build(&$box, $text, &$pipeline) {
error_no_method('build', get_class($this));
}
function break_into_lines($content) {
return preg_split('/[\r\n]/u', $content);
}
function break_into_words($content) {
$content = trim($content);
if ($content == '') {
return array();
};
// Extract Unicode characters from the raw content data
$ptr = 0;
$utf8_chars = array();
$ucs2_chars = array();
$size = strlen($content);
while ($ptr < $size) {
$utf8_char = ManagerEncoding::get_next_utf8_char($content, $ptr);
$utf8_chars[] = $utf8_char;
$ucs2_chars[] = utf8_to_code($utf8_char);
};
// Get unicode line breaking classes
$classes = array_map(array($this, 'get_line_break_class'), $ucs2_chars);
$this->find_line_break($classes, $breaks, count($classes));
// Make words array
$words = array();
$word = '';
for ($i = 0, $size = count($breaks); $i < $size; $i++) {
$word .= $utf8_chars[$i];
$break = $breaks[$i];
if ($break == LB_INDIRECT ||
$break == LB_INDIRECT_CM ||
$break == LB_DIRECT ||
$break == LB_EXPLICIT) {
$words[] = trim($word);
$word = '';
};
};
return $words;
}
function find_complex_break($current_class, $classes, &$breaks, $offset, $length) {
if ($offset >= $length) {
return 0;
};
for ($i = $offset; $i < $length; $i++) {
// TODO
$breaks[$i - 1] = LB_PROHIBITED;
if ($classes[$i] != UC_LINE_BREAK_SA) {
break;
};
};
return $i;
}
function find_line_break($classes, &$breaks, $length) {
if (!$length) {
return 0;
};
$class = $classes[0]; // class of 'before' character
if ($class == UC_LINE_BREAK_LF ||
$class == UC_LINE_BREAK_NL) {
$class = UL_LINE_BREAK_BK;
}
// loop over all pairs in the string up to a hard break
for ($i = 1; ($i < $length) && ($class != UC_LINE_BREAK_BK); $i++) {
// handle explicit breaks here
// handle BK, NL and LF explicitly
if ($classes[$i] == UC_LINE_BREAK_BK ||
$classes[$i] == UC_LINE_BREAK_NL ||
$classes[$i] == UC_LINE_BREAK_LF) {
$breaks[$i-1] = LB_PROHIBITED;
$class = UC_LINE_BREAK_BK;
continue;
}
// handle CR explicitly
if ($classes[$i] == UC_LINE_BREAK_CR) {
$breaks[$i-1] = LB_PROHIBITED;
$class = UC_LINE_BREAK_CR;
continue;
}
// handle spaces explicitly
if ($classes[$i] == UC_LINE_BREAK_SP) {
$breaks[$i-1] = LB_PROHIBITED;
continue;
};
// handle complex scripts in a separate function
if ($classes[$i] == UC_LINE_BREAK_SA) {
$i += $this->find_complex_break($class, $classes, $breaks, $i, $length);
if ($i < $length) {
$class = $classes[$i];
continue;
};
};
// lookup pair table information
$current_class = $classes[$i];
$break = $GLOBALS['_g_line_break_class_table'][$class][$current_class];
$breaks[$i - 1] = $break;
if ($break == LB_INDIRECT) {
if ($classes[$i - 1] == UC_LINE_BREAK_SP) {
$breaks[$i - 1] = LB_INDIRECT;
} else {
$breaks[$i - 1] = LB_PROHIBITED;
};
// handle breaks involving a combining mark
} elseif ($break == LB_INDIRECT_CM) {
$breaks[$i - 1]= LB_PROHIBITED;
if ($classes[$i - 1] == UC_LINE_BREAK_SP) {
$breaks[$i - 1] = LB_INDIRECT_CM;
} else {
continue; // do not update cls
};
} elseif ($break == LB_PROHIBITED_CM) {
$breaks[$i - 1] = LB_PROHIBITED_CM;
if ($classes[$i - 1] != UC_LINE_BREAK_SP) {
continue;
};
};
// save cls of 'before' character (unless bypassed by 'continue')
$class = $classes[$i];
};
$breaks[$i-1] = LB_EXPLICIT;
return $i;
}
function is_break_allowed($previous_class, $current_class) {
return true;
}
function get_line_break_class($ucs2_char) {
static $class_cache = array();
if (!isset($class_cache[$ucs2_char])) {
$table_handle = $this->get_line_break_class_table_handle();
fseek($table_handle, $ucs2_char /* as integer */ , SEEK_SET);
$class_cache[$ucs2_char] = ord(fread($table_handle, 1));
};
// Apply rule LB1 from the Unicode algorithm:
//
// Assign a line breaking class to each code point of the
// input. Resolve AI, CB, SA, SG, and XX into other line breaking
// classes depending on criteria outside the scope of this
// algorithm.
//
// In the absence of such criteria, it is recommended that classes
// AI, SA, SG, and XX be resolved to AL, except that characters of
// class SA that have General_Category Mn or Mc be resolved to CM
// (see SA). Unresolved class CB is handled in rule LB20.
// Resolve AI, SA, SG, and XX to AL
if (in_array($class_cache[$ucs2_char],
array(UC_LINE_BREAK_AI,
UC_LINE_BREAK_SA,
UC_LINE_BREAK_SG,
UC_LINE_BREAK_XX))) {
return UC_LINE_BREAK_AL;
};
return $class_cache[$ucs2_char];
}
function get_line_break_class_table_handle() {
static $table_handle = null;
if (is_null($table_handle)) {
$filename = CACHE_DIR.'unicode.lb.classes.dat';
if (!file_exists($filename)) {
$this->generate_line_break_class_table($filename);
};
$table_handle = fopen($filename, 'rb');
flock($table_handle, LOCK_SH);
};
return $table_handle;
}
function generate_line_break_class_table($output_filename) {
$class_codes = array('BK' => 1,
'CR' => 2,
'LF' => 3,
'CM' => 4,
'NL' => 5,
'SG' => 6,
'WJ' => 7,
'ZW' => 8,
'GL' => 9,
'SP' => 10,
'B2' => 11,
'BA' => 12,
'BB' => 13,
'HY' => 14,
'CB' => 15,
'CL' => 16,
'EX' => 17,
'IN' => 18,
'NS' => 19,
'OP' => 20,
'QU' => 21,
'IS' => 22,
'NU' => 23,
'PO' => 24,
'PR' => 25,
'SY' => 26,
'AI' => 27,
'AL' => 28,
'H2' => 29,
'H3' => 30,
'ID' => 31,
'JL' => 32,
'JV' => 33,
'JT' => 34,
'SA' => 35,
'XX' => 36);
$output_handle = fopen($output_filename, 'wb');
flock($output_handle, LOCK_EX);
$input_handle = fopen(HTML2PS_DIR.'/data/LineBreak.txt', 'r');
$last_position = 0;
while ($line = fgets($input_handle)) {
$line = trim($line);
if (strlen($line) == 0 || $line[0] == '#') {
continue;
};
if (preg_match('/^([0-9a-f]+);(\w\w) #/i', $line, $matches)) {
$unicode_position = hexdec($matches[1]);
$class = $matches[2];
if ($unicode_position > $last_position + 1) {
fwrite($output_handle, str_repeat(chr(0), $unicode_position - $last_position - 1));
};
fwrite($output_handle, chr($class_codes[$class]));
$last_position = $unicode_position;
} elseif (preg_match('/^([0-9a-f]+)\.\.([0-9a-f]+);(\w\w) #/i', $line, $matches)) {
$unicode_start_position = hexdec($matches[1]);
$unicode_end_position = hexdec($matches[2]);
$class = $matches[3];
if ($unicode_start_position > $last_position + 1) {
fwrite($output_handle, str_repeat(chr(0), $unicode_start_position - $last_position - 1));
};
fwrite($output_handle, str_repeat(chr($class_codes[$class]), $unicode_end_position - $unicode_start_position + 1));
$last_position = $unicode_end_position;
} else {
var_dump($line); die();
}
};
fclose($input_handle);
flock($output_handle, LOCK_UN);
fclose($output_handle);
}
function collapse_whitespace($content) {
return preg_replace('/[\r\n\t ]+/u', ' ', $content);
}
function remove_leading_linefeeds($content) {
return preg_replace('/^ *[\r\n]+/u', '', $content);
}
function remove_trailing_linefeeds($content) {
return preg_replace('/[\r\n]+$/u', '', $content);
}
}
?>