Covert pdf to txt file (where var names withing [])

data crf;

*change max length if needed;

length col1   $2000;

infile 'P:\XX\XX\XX\XX\XX\XX\CRF_AS_TXT_FILE.txt' dlm='09'x  dsd;

input col1 $ ;

*bring in line number for sort order;

line = _n_;

run;

*sort in descending order in order to get page number at bottom of page;

proc sort data=crf out=crf1;

by decending line;

run;

data crf2;

retain pageno ;

set crf1;

*create page var & find raw var names;

if index(col1, "Page") > 0 then pageno = col1;

if index(col1,"[") >0 and index(col1,"]") > 0 then flag = "Y";

run;

proc sort data=crf2 out=crf3;

by line;

run;

data crf4;

retain module;

set crf3;

*add module;

if index(col1, "XX_TITLE:") > 0 then module = col1;

  findmax=length(compress(col1,']','k'));

run;

*find max number of raw var;

proc sql noprint;

select max(findmax)

into: maxn trimmed

from crf4;

quit;

data varname(drop=col1 flag findmax i pos1 pos2 diff pageno check);

set crf4;

*only bring in raw var names;

where flag= "Y" ;

array varname {*} $40 variable1-variable&maxn. ;

do i = 1 to &maxn. by 1;

check = index(col1, "]");

if check ne 0 then do;

*get varname outside of [];

     pos1 = find(col1, '[') ;

     pos2 = find(col1, ']');

     diff = pos2-(pos1+1);

varname{i}= upcase(substr(col1, pos1+1, diff));

     col1= substr(col1, pos2+1);

end;

end;

*get pageno as numeric;

page = input(scan(pageno, 2), best.);

run;

proc transpose data= varname out=varname2(rename=(col1=variable));

by line page module;

var variable: ;

run;

proc sort data= varname2 out=crf_page(keep=page module variable) nodupkey;

by page variable;

where variable ne "";

run;

/*

**************CHECK ALL RAW VARS*************;

proc sql;

create table check1 as

select name as variable length=2000, memname

from sashelp.vcolumn

where libname= "RAW";

quit;

proc sort data=check1 out=check2 nodupkey;

  by variable;

run;

proc sort data=crf_page out=crf_temp;

  by variable;

run;

data check3;

merge check2 crf_temp(in=a);

by variable;

if a then origin= "CRF";

last= substr(reverse(trim(variable)), 1, 1);

run;

data check4;

  set check3;

  where origin ne "CRF" and last ne "L";

run;*/

Previous
Previous

Decimal Align Macro