Covert pdf to txt file (where var names withing [])
data crf;
*change max length if needed;
length col1 $2000;
infile 'P:\XX\XX\XX\XX\XX\XX\CRF_AS_TXT_FILE.txt' dlm='09'x dsd;
input col1 $ ;
*bring in line number for sort order;
line = _n_;
run;
*sort in descending order in order to get page number at bottom of page;
proc sort data=crf out=crf1;
by decending line;
run;
data crf2;
retain pageno ;
set crf1;
*create page var & find raw var names;
if index(col1, "Page") > 0 then pageno = col1;
if index(col1,"[") >0 and index(col1,"]") > 0 then flag = "Y";
run;
proc sort data=crf2 out=crf3;
by line;
run;
data crf4;
retain module;
set crf3;
*add module;
if index(col1, "XX_TITLE:") > 0 then module = col1;
findmax=length(compress(col1,']','k'));
run;
*find max number of raw var;
proc sql noprint;
select max(findmax)
into: maxn trimmed
from crf4;
quit;
data varname(drop=col1 flag findmax i pos1 pos2 diff pageno check);
set crf4;
*only bring in raw var names;
where flag= "Y" ;
array varname {*} $40 variable1-variable&maxn. ;
do i = 1 to &maxn. by 1;
check = index(col1, "]");
if check ne 0 then do;
*get varname outside of [];
pos1 = find(col1, '[') ;
pos2 = find(col1, ']');
diff = pos2-(pos1+1);
varname{i}= upcase(substr(col1, pos1+1, diff));
col1= substr(col1, pos2+1);
end;
end;
*get pageno as numeric;
page = input(scan(pageno, 2), best.);
run;
proc transpose data= varname out=varname2(rename=(col1=variable));
by line page module;
var variable: ;
run;
proc sort data= varname2 out=crf_page(keep=page module variable) nodupkey;
by page variable;
where variable ne "";
run;
/*
**************CHECK ALL RAW VARS*************;
proc sql;
create table check1 as
select name as variable length=2000, memname
from sashelp.vcolumn
where libname= "RAW";
quit;
proc sort data=check1 out=check2 nodupkey;
by variable;
run;
proc sort data=crf_page out=crf_temp;
by variable;
run;
data check3;
merge check2 crf_temp(in=a);
by variable;
if a then origin= "CRF";
last= substr(reverse(trim(variable)), 1, 1);
run;
data check4;
set check3;
where origin ne "CRF" and last ne "L";
run;*/